Valid HTML 4.0! Valid CSS!
%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.84",
%%%     date            = "20 March 2024",
%%%     time            = "07:43:07 MST",
%%%     filename        = "vldbe.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "https://www.math.utah.edu/~beebe",
%%%     checksum        = "27900 122414 630911 5957896",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "BibTeX; bibliography; Very Large Data Bases;
%%%                        Proceedings of the VLDB Endowment",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE bibliography of
%%%                        publications in the Proceedings of the VLDB
%%%                        Endowment (CODEN unknown, ISSN 2150-8097).
%%%
%%%                        The journal has a Web site at
%%%
%%%                            http://portal.acm.org/citation.cfm?id=J1174
%%%
%%%                        At version 1.84, the year coverage looked
%%%                        like this:
%%%
%%%                             2008 ( 169)    2014 ( 230)    2020 ( 221)
%%%                             2009 ( 167)    2015 ( 224)    2021 ( 345)
%%%                             2010 ( 193)    2016 ( 183)    2022 ( 309)
%%%                             2011 (  75)    2017 ( 207)    2023 ( 271)
%%%                             2012 ( 187)    2018 ( 100)
%%%                             2013 ( 238)    2019 ( 230)
%%%
%%%                             Article:       3349
%%%
%%%                             Total entries: 3349
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================
@Preamble{
    "\ifx \undefined \circled   \def \circled   #1{(#1)}\fi" #
    "\ifx \undefined \k \let \k = \c \fi" #
    "\ifx \undefined \ocirc  \def \ocirc #1{{\accent'27#1}}\fi" #
    "\ifx \undefined \pkg       \def \pkg       #1{{{\tt #1}}} \fi" #
    "\ifx \undefined \reg       \def \reg       {\circled{R}}\fi"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|https://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:
@String{j-PROC-VLDB-ENDOWMENT = "Proceedings of the VLDB Endowment"}

%%% ====================================================================
%%% Bibliography entries, sorted in publication order:
@Article{Hill:2008:TMO,
  author =       "Mark D. Hill",
  title =        "Is transactional memory an oxymoron?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1--1",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453858",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zobel:2008:DSH,
  author =       "Justin Zobel",
  title =        "Databases and the silification of health",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "2--2",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453859",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Blott:2008:WWH,
  author =       "Stephen Blott and Roger Weber",
  title =        "What's wrong with high-dimensional similarity
                 search?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "3--3",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453861",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bruno:2008:CPD,
  author =       "Nicolas Bruno and Surajit Chaudhuri",
  title =        "Constrained physical design tuning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "4--15",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453863",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kementsietsidis:2008:SMQ,
  author =       "Anastasios Kementsietsidis and Frank Neven and Dieter
                 Van de Craen and Stijn Vansummeren",
  title =        "Scalable multi-query optimization for exploratory
                 queries over federated scientific databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "16--27",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453864",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{DeWitt:2008:CIC,
  author =       "David J. DeWitt and Erik Paulson and Eric Robinson and
                 Jeffrey Naughton and Joshua Royalty and Srinath Shankar
                 and Andrew Krioukov",
  title =        "{Clustera}: an integrated computation and data
                 management system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "28--41",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453865",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cheung:2008:PPE,
  author =       "Alvin Cheung and Samuel Madden",
  title =        "Performance profiling with {EndoScope}, an
                 acquisitional software monitoring framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "42--53",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453866",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bar-Yossef:2008:MSE,
  author =       "Ziv Bar-Yossef and Maxim Gurevich",
  title =        "Mining search engine query logs via suggestion
                 sampling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "54--65",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453868",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Akdere:2008:PBC,
  author =       "Mert Akdere and U{\u{g}}ur {\c{C}}etintemel and Nesime
                 Tatbul",
  title =        "Plan-based complex event detection across distributed
                 sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "66--77",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453869",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lachmann:2008:FRP,
  author =       "Alexander Lachmann and Mirek Riedewald",
  title =        "Finding relevant patterns in bursty sequences",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "78--89",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453870",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cheng:2008:CLW,
  author =       "Hao Cheng and Kien A. Hua and Khanh Vu",
  title =        "Constrained locally weighted clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "90--101",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453871",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hay:2008:RSR,
  author =       "Michael Hay and Gerome Miklau and David Jensen and Don
                 Towsley and Philipp Weis",
  title =        "Resisting structural re-identification in anonymized
                 social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "102--114",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453873",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Terrovitis:2008:PPA,
  author =       "Manolis Terrovitis and Nikos Mamoulis and Panos
                 Kalnis",
  title =        "Privacy-preserving anonymization of set-valued data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "115--125",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453874",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pang:2008:AQR,
  author =       "HweeHwa Pang and Kyriakos Mouratidis",
  title =        "Authenticating the query results of text search
                 engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "126--137",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453875",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kundu:2008:SST,
  author =       "Ashish Kundu and Elisa Bertino",
  title =        "Structural signatures for tree data structures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "138--150",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453876",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Roitman:2008:MDC,
  author =       "Haggai Roitman and David Carmel and Elad Yom-Tov",
  title =        "Maintaining dynamic channel profiles on the {Web}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "151--162",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453878",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2008:WDD,
  author =       "Fan Yang and Nitin Gupta and Chavdar Botev and
                 Elizabeth F. Churchill and George Levchenko and Jayavel
                 Shanmugasundaram",
  title =        "{WYSIWYG} development of data driven {Web}
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "163--175",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453879",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Baykan:2008:WPL,
  author =       "Eda Baykan and Monika Henzinger and Ingmar Weber",
  title =        "{Web} page language identification based on {URLs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "176--187",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453880",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Han:2008:PQO,
  author =       "Wook-Shin Han and Wooseong Kwak and Jinsoo Lee and Guy
                 M. Lohman and Volker Markl",
  title =        "Parallelizing query optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "188--200",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453882",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hadjieleftheriou:2008:HSS,
  author =       "Marios Hadjieleftheriou and Xiaohui Yu and Nick Koudas
                 and Divesh Srivastava",
  title =        "Hashed samples: selectivity estimators for set
                 similarity selection queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "201--212",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453883",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cohen:2008:TEU,
  author =       "Edith Cohen and Haim Kaplan",
  title =        "Tighter estimation using bottom $k$ sketches",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "213--229",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453884",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alexe:2008:STB,
  author =       "Bogdan Alexe and Wang-Chiew Tan and Yannis
                 Velegrakis",
  title =        "{STBenchmark}: towards a benchmark for mapping
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "230--244",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453886",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Katsis:2008:ISR,
  author =       "Yannis Katsis and Alin Deutsch and Yannis
                 Papakonstantinou",
  title =        "Interactive source registration in community-oriented
                 information integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "245--259",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453887",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hernandez:2008:DED,
  author =       "Mauricio A. Hern{\'a}ndez and Paolo Papotti and
                 Wang-Chiew Tan",
  title =        "Data exchange with data-metadata translations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "260--273",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453888",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2008:OPN,
  author =       "Jin Li and Kristin Tufte and Vladislav Shkapenyuk and
                 Vassilis Papadimos and Theodore Johnson and David
                 Maier",
  title =        "Out-of-order processing: a new architecture for
                 high-performance stream systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "274--288",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453890",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Han:2008:SET,
  author =       "Wook-Shin Han and Haifeng Jiang and Howard Ho and
                 Quanzhong Li",
  title =        "{StreamTX}: extracting tuples from streaming {XML}
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "289--300",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453891",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jin:2008:SWT,
  author =       "Cheqing Jin and Ke Yi and Lei Chen and Jeffrey Xu Yu
                 and Xuemin Lin",
  title =        "Sliding-window top-$k$ queries on uncertain streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "301--312",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453892",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koch:2008:CPD,
  author =       "Christoph Koch and Dan Olteanu",
  title =        "Conditioning probabilistic databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "313--325",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453894",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Beskales:2008:EST,
  author =       "George Beskales and Mohamed A. Soliman and Ihab F.
                 Ilyas",
  title =        "Efficient search for the top-$k$ probable nearest
                 neighbors in uncertain databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "326--339",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453895",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2008:BML,
  author =       "Daisy Zhe Wang and Eirinaios Michelakis and Minos
                 Garofalakis and Joseph M. Hellerstein",
  title =        "{BayesStore}: managing large, uncertain data
                 repositories with probabilistic graphical models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "340--351",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453896",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2008:TIT,
  author =       "Daniel Deutch and Tova Milo",
  title =        "Type inference and type checking for queries on
                 execution traces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "352--363",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453898",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shang:2008:TVH,
  author =       "Haichuan Shang and Ying Zhang and Xuemin Lin and
                 Jeffrey Xu Yu",
  title =        "Taming verification hardness: an efficient algorithm
                 for testing subgraph isomorphism",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "364--375",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453899",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Golab:2008:GNO,
  author =       "Lukasz Golab and Howard Karloff and Flip Korn and
                 Divesh Srivastava and Bei Yu",
  title =        "On generating near-optimal tableaux for conditional
                 functional dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "376--390",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453900",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2008:PFD,
  author =       "Wenfei Fan and Shuai Ma and Yanli Hu and Jie Liu and
                 Yinghui Wu",
  title =        "Propagating functional dependencies with conditions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "391--407",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453901",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Antonellis:2008:SQR,
  author =       "Ioannis Antonellis and Hector Garcia Molina and Chi
                 Chao Chang",
  title =        "{Simrank++}: query rewriting through link analysis of
                 the click graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "408--421",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453903",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lizorkin:2008:AEO,
  author =       "Dmitry Lizorkin and Pavel Velikhov and Maxim Grinev
                 and Denis Turdakov",
  title =        "Accuracy estimate and optimization techniques for
                 {SimRank} computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "422--433",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453904",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2008:EES,
  author =       "Badrish Chandramouli and Jun Yang",
  title =        "End-to-end support for joins in large-scale
                 publish\slash subscribe systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "434--450",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453905",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Machanavajjhala:2008:SRP,
  author =       "Ashwin Machanavajjhala and Erik Vee and Minos
                 Garofalakis and Jayavel Shanmugasundaram",
  title =        "Scalable ranked publish\slash subscribe",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "451--462",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453906",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Teubner:2008:DCF,
  author =       "Jens Teubner and Torsten Grust and Sebastian Maneth
                 and Sherif Sakr",
  title =        "Dependable cardinality forecasts for {XQuery}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "463--477",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453908",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2008:HBS,
  author =       "Hongzhi Wang and Jianzhong Li and Jizhou Luo and Hong
                 Gao",
  title =        "Hash-base subgraph query processing method for
                 graph-structured {XML} documents",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "478--489",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453909",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cohen:2008:GXS,
  author =       "Sara Cohen",
  title =        "Generating {XML} structure using examples and
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "490--501",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453910",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Holloway:2008:ROD,
  author =       "Allison L. Holloway and David J. DeWitt",
  title =        "Read-optimized databases, in depth",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "502--513",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453912",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koltsidas:2008:FSL,
  author =       "Ioannis Koltsidas and Stratis D. Viglas",
  title =        "Flashing up the storage layer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "514--525",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453913",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sears:2008:RCL,
  author =       "Russell Sears and Mark Callaghan and Eric Brewer",
  title =        "{Rose}: compressed, log-structured replication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "526--537",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453914",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cafarella:2008:WEP,
  author =       "Michael J. Cafarella and Alon Halevy and Daisy Zhe
                 Wang and Eugene Wu and Yang Zhang",
  title =        "{WebTables}: exploring the power of tables on the
                 {Web}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "538--549",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453916",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Garrod:2008:SQR,
  author =       "Charles Garrod and Amit Manjhi and Anastasia Ailamaki
                 and Bruce Maggs and Todd Mowry and Christopher Olston
                 and Anthony Tomasic",
  title =        "Scalable query result caching for {Web} applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "550--561",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453917",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Braga:2008:OMD,
  author =       "Daniele Braga and Stefano Ceri and Florian Daniel and
                 Davide Martinenghi",
  title =        "Optimization of multi-domain queries on the {Web}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "562--573",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453918",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kwon:2008:FTS,
  author =       "YongChul Kwon and Magdalena Balazinska and Albert
                 Greenberg",
  title =        "Fault-tolerant stream processing using a distributed,
                 replicated file system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "574--585",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453920",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yeh:2008:LLW,
  author =       "Mi-Yen Yeh and Kun-Lung Wu and Philip S. Yu and
                 Ming-Syan Chen",
  title =        "{LeeWave}: level-wise distribution of wavelet
                 coefficients for processing $k$ {NN} queries over
                 distributed streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "586--597",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453921",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aguilera:2008:PSD,
  author =       "Marcos K. Aguilera and Wojciech Golab and Mehul A.
                 Shah",
  title =        "A practical scalable distributed {B-tree}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "598--609",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453922",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qiao:2008:MMS,
  author =       "Lin Qiao and Vijayshankar Raman and Frederick Reiss
                 and Peter J. Haas and Guy M. Lohman",
  title =        "Main-memory scan sharing for multi-core {CPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "610--621",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453924",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Johnson:2008:RWP,
  author =       "Ryan Johnson and Vijayshankar Raman and Richard Sidle
                 and Garret Swart",
  title =        "Row-wise parallel predicate evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "622--634",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453925",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Soundararajan:2008:DPC,
  author =       "Gokul Soundararajan and Jin Chen and Mohamed A. Sharaf
                 and Cristiana Amza",
  title =        "Dynamic partitioning of the cache hierarchy in shared
                 data centers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "635--646",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453926",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Neumann:2008:RRS,
  author =       "Thomas Neumann and Gerhard Weikum",
  title =        "{RDF-3X}: a {RISC}-style engine for {RDF}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "647--659",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453927",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Simitsis:2008:MCE,
  author =       "Alkis Simitsis and Akanksha Baid and Yannis Sismanis
                 and Berthold Reinwald",
  title =        "Multidimensional content {eXploration}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "660--671",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453929",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fontoura:2008:RTS,
  author =       "Marcus Fontoura and Vanja Josifovski and Ravi Kumar
                 and Christopher Olston and Andrew Tomkins and Sergei
                 Vassilvitskii",
  title =        "Relaxation in text search using taxonomies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "672--683",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453930",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nguyen:2008:LEF,
  author =       "Hoa Nguyen and Thanh Nguyen and Juliana Freire",
  title =        "Learning to extract form labels",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "684--694",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453931",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jayapandian:2008:ACF,
  author =       "Magesh Jayapandian and H. V. Jagadish",
  title =        "Automated creation of a forms-based database query
                 interface",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "695--709",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453932",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yahia:2008:ENA,
  author =       "Sihem Amer Yahia and Michael Benedikt and Laks V. S.
                 Lakshmanan and Julia Stoyanovich",
  title =        "Efficient network aware search in collaborative
                 tagging sites",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "710--721",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453934",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cheng:2008:CUD,
  author =       "Reynold Cheng and Jinchuan Chen and Xike Xie",
  title =        "Cleaning uncertain data with quality guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "722--735",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453935",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2008:PNA,
  author =       "Jiansheng Huang and Ting Chen and AnHai Doan and
                 Jeffrey F. Naughton",
  title =        "On the provenance of non-answers to queries over
                 extracted data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "736--747",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453936",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2008:DAP,
  author =       "Shenghuo Zhu and Tao Li and Zhiyuan Chen and Dingding
                 Wang and Yihong Gong",
  title =        "Dynamic active probing of helpdesk databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "748--760",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453937",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Curino:2008:GDS,
  author =       "Carlo A. Curino and Hyun J. Moon and Carlo Zaniolo",
  title =        "Graceful database schema evolution: the {PRISM}
                 workbench",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "761--772",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453939",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chai:2008:ARD,
  author =       "Xiaoyong Chai and Mayssam Sayyadian and AnHai Doan and
                 Arnon Rosenthal and Len Seligman",
  title =        "Analyzing and revising data integration schemas to
                 improve their matchability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "773--784",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453940",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Talukdar:2008:LCD,
  author =       "Partha Pratim Talukdar and Marie Jacob and Muhammad
                 Salman Mehmood and Koby Crammer and Zachary G. Ives and
                 Fernando Pereira and Sudipto Guha",
  title =        "Learning to create data-integrating queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "785--796",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453941",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Re:2008:ALP,
  author =       "Christopher R{\'e} and Dan Suciu",
  title =        "Approximate lineage for probabilistic databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "797--808",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453943",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sen:2008:ESC,
  author =       "Prithviraj Sen and Amol Deshpande and Lise Getoor",
  title =        "Exploiting shared correlations in probabilistic
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "809--820",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453944",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rastogi:2008:ACU,
  author =       "Vibhor Rastogi and Dan Suciu and Evan Welbourne",
  title =        "Access control over uncertain data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "821--832",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453945",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cormode:2008:ABG,
  author =       "Graham Cormode and Divesh Srivastava and Ting Yu and
                 Qing Zhang",
  title =        "Anonymizing bipartite graph data using safe
                 groupings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "833--844",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453947",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bu:2008:PPS,
  author =       "Yingyi Bu and Ada Wai Chee Fu and Raymond Chi Wing
                 Wong and Lei Chen and Jiuyong Li",
  title =        "Privacy preserving serial data publishing by role
                 composition",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "845--856",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453948",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xiao:2008:OPQ,
  author =       "Xiaokui Xiao and Yufei Tao",
  title =        "Output perturbation with query relaxation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "857--869",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453949",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lomet:2008:TTI,
  author =       "David Lomet and Mingsheng Hong and Rimma Nehme and Rui
                 Zhang",
  title =        "Transaction time indexing with version compression",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "870--881",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453951",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Moon:2008:MQT,
  author =       "Hyun J. Moon and Carlo A. Curino and Alin Deutsch and
                 Chien-Yi Hou and Carlo Zaniolo",
  title =        "Managing and querying transaction-time databases under
                 schema evolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "882--895",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453952",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sherkat:2008:EST,
  author =       "Reza Sherkat and Davood Rafiei",
  title =        "On efficiently searching trajectories and archival
                 data for historical similarities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "896--908",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453953",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pu:2008:KQC,
  author =       "Ken Q. Pu and Xiaohui Yu",
  title =        "Keyword query cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "909--920",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453955",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2008:RIR,
  author =       "Ziyang Liu and Yi Cher",
  title =        "Reasoning and identifying relevant matches for {XML}
                 keyword search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "921--932",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453956",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xiao:2008:EJE,
  author =       "Chuan Xiao and Wei Wang and Xuemin Lin",
  title =        "{Ed-Join}: an efficient algorithm for similarity joins
                 with edit distance constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "933--944",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453957",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Agrawal:2008:SAH,
  author =       "Sanjay Agrawal and Kaushik Chakrabarti and Surajit
                 Chaudhuri and Venkatesh Ganti",
  title =        "Scalable ad-hoc entity extraction from text
                 collections",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "945--957",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453958",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Agrawal:2008:SSS,
  author =       "Parag Agrawal and Daniel Kifer and Christopher
                 Olston",
  title =        "Scheduling shared scans of large data files",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "958--969",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453960",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nath:2008:OMV,
  author =       "Suman Nath and Phillip B. Gibbons",
  title =        "Online maintenance of very large random samples on
                 flash storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "970--983",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453961",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ge:2008:SLA,
  author =       "Tingjian Ge and Stan Zdonik",
  title =        "A skip-list approach for efficiently processing
                 forecasting queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "984--995",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453962",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Phan:2008:RRF,
  author =       "Thomas Phan and Wen-Syan Li",
  title =        "A request-routing framework for {SOA}-based enterprise
                 computing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "996--1007",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453963",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Weiss:2008:HSI,
  author =       "Cathrin Weiss and Panagiotis Karras and Abraham
                 Bernstein",
  title =        "{Hexastore}: sextuple indexing for {Semantic Web} data
                 management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1008--1019",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453965",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shahabi:2008:ILS,
  author =       "Cyrus Shahabi and Lu-An Tang and Songhua Xing",
  title =        "Indexing land surface for efficient {kNN} query",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1020--1031",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453966",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wong:2008:ESQ,
  author =       "Raymond Chi-Wing Wong and Ada Wai-Chee Fu and Jian Pei
                 and Yip Sing Ho and Tai Wong and Yubao Liu",
  title =        "Efficient skyline querying with variable user
                 preferences on nominal attributes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1032--1043",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453967",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guo:2008:ETP,
  author =       "Lin Guo and Sihem Amer Yahia and Raghu Ramakrishnan
                 and Jayavel Shanmugasundaram and Utkarsh Srivastava and
                 Erik Vee",
  title =        "Efficient top-$k$ processing over query-dependent
                 functions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1044--1055",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453968",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2008:FER,
  author =       "Wei Wu and Fei Yang and Chee-Yong Chan and Kian-Lee
                 Tan",
  title =        "{FINCH}: evaluating reverse $k$-Nearest-Neighbor
                 queries on location data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1056--1067",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453970",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jeung:2008:DCT,
  author =       "Hoyoung Jeung and Man Lung Yiu and Xiaofang Zhou and
                 Christian S. Jensen and Heng Tao Shen",
  title =        "Discovery of convoys in trajectory databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1068--1080",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453971",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2008:TTC,
  author =       "Jae-Gil Lee and Jiawei Han and Xiaolei Li and Hector
                 Gonzalez",
  title =        "{TraClass}: trajectory classification using
                 hierarchical region-based and trajectory-based
                 clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1081--1094",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453972",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nutanong:2008:VDQ,
  author =       "Sarana Nutanong and Rui Zhang and Egemen Tanin and
                 Lars Kulik",
  title =        "The {V*-Diagram}: a query-dependent approach to moving
                 {KNN} queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1095--1106",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453973",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guravannavar:2008:RPB,
  author =       "Ravindra Guravannavar and S. Sudarshan",
  title =        "Rewriting procedures for batched bindings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1107--1123",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453975",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{D:2008:IRP,
  author =       "Harish D. and Pooja N. Darera and Jayant R. Haritsa",
  title =        "Identifying robust plans through plan diagram
                 reduction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1124--1140",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453976",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chaudhuri:2008:PYG,
  author =       "Surajit Chaudhuri and Vivek Narasayya and Ravi
                 Ramamurthy",
  title =        "A pay-as-you-go framework for query execution
                 feedback",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1141--1152",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453977",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Condie:2008:ERM,
  author =       "Tyson Condie and David Chu and Joseph M. Hellerstein
                 and Petros Maniatis",
  title =        "Evita raced: metacompilation for declarative
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1153--1165",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453978",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chiang:2008:DDQ,
  author =       "Fei Chiang and Ren{\'e}e J. Miller",
  title =        "Discovering data quality rules",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1166--1177",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453980",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2008:MNR,
  author =       "Xiang Zhang and Feng Pan and Wei Wang and Andrew
                 Nobel",
  title =        "Mining non-redundant high order correlations in binary
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1178--1188",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453981",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dalvi:2008:KSE,
  author =       "Bhavana Bharat Dalvi and Meghana Kshirsagar and S.
                 Sudarshan",
  title =        "Keyword search on external memory data graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1189--1204",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453982",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koltsidas:2008:SHD,
  author =       "Ioannis Koltsidas and Heiko M{\"u}ller and Stratis D.
                 Viglas",
  title =        "Sorting hierarchical data in external memory for
                 archiving",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "1",
  pages =        "1205--1216",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1453856.1453983",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:36 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Metwally:2008:SSP,
  author =       "Ahmed Metwally and Fatih Emek{\c{c}}i and Divyakant
                 Agrawal and Amr {El Abbadi}",
  title =        "{SLEUTH}: {Single-pubLisher attack dEtection Using
                 correlaTion Hunting}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1217--1228",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454161",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Poess:2008:ECK,
  author =       "Meikel Poess and Raghunath Othayoth Nambiar",
  title =        "Energy cost, the key challenge of today's data
                 centers: a power consumption analysis of {TPC}-{C}
                 results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1229--1240",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454162",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Madhavan:2008:GDW,
  author =       "Jayant Madhavan and David Ko and Lucja Kot and Vignesh
                 Ganapathy and Alex Rasmussen and Alon Halevy",
  title =        "{Google}'s {Deep Web} crawl",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1241--1252",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454163",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Weis:2008:ISD,
  author =       "Melanie Weis and Felix Naumann and Ulrich Jehle and
                 Jens Lufter and Holger Schuster",
  title =        "Industry-scale duplicate detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1253--1264",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454165",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chaiken:2008:SEE,
  author =       "Ronnie Chaiken and Bob Jenkins and Per-{\AA}ke Larson
                 and Bill Ramsey and Darren Shakib and Simon Weaver and
                 Jingren Zhou",
  title =        "{SCOPE}: easy and efficient parallel processing of
                 massive data sets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1265--1276",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454166",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cooper:2008:PYH,
  author =       "Brian F. Cooper and Raghu Ramakrishnan and Utkarsh
                 Srivastava and Adam Silberstein and Philip Bohannon and
                 Hans-Arno Jacobsen and Nick Puz and Daniel Weaver and
                 Ramana Yerneni",
  title =        "{PNUTS}: {Yahoo!}'s hosted data serving platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1277--1288",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454167",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Acharya:2008:RSF,
  author =       "Srini Acharya and Peter Carlin and Cesar
                 Galindo-Legaria and Krzysztof Kozielczyk and Pawel
                 Terlecki and Peter Zabback",
  title =        "Relational support for flexible schema scenarios",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1289--1300",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454169",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mukherjee:2008:OSS,
  author =       "Niloy Mukherjee and Bharath Aleti and Amit Ganesh and
                 Krishna Kunchithapadam and Scott Lynn and Sujatha
                 Muthulingam and Kam Shergill and Shaoyu Wang and Wei
                 Zhang",
  title =        "{Oracle SecureFiles System}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1301--1312",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454170",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chhugani:2008:EIS,
  author =       "Jatin Chhugani and Anthony D. Nguyen and Victor W. Lee
                 and William Macy and Mostafa Hagog and Yen-Kuang Chen
                 and Akram Baransi and Sanjeev Kumar and Pradeep Dubey",
  title =        "Efficient implementation of sorting on multi-core
                 {SIMD CPU} architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1313--1324",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454171",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dey:2008:EAQ,
  author =       "Atreyee Dey and Sourjya Bhaumik and Harish D. and
                 Jayant R. Haritsa",
  title =        "Efficiently approximating query optimizer plan
                 diagrams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1325--1336",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454173",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Slezak:2008:BAD,
  author =       "Dominik {\'S}l{\k{e}}zak and Jakub Wr{\'o}blewski and
                 Victoria Eastwood and Piotr Synak",
  title =        "{Brighthouse}: an analytic data warehouse for ad-hoc
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1337--1345",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454174",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ziauddin:2008:OPC,
  author =       "Mohamed Ziauddin and Dinesh Das and Hong Su and Yali
                 Zhu and Khaled Yagoub",
  title =        "Optimizer plan change management: improved stability
                 and performance in {Oracle} 11g",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1346--1355",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454175",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2008:TPX,
  author =       "Zhen Hua Liu and Sivasankaran Chandrasekar and Thomas
                 Baby and Hui J. Chang",
  title =        "Towards a physical {XML} independent {XQuery\slash
                 SQL\slash XML} engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1356--1367",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454177",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2008:CQP,
  author =       "Allison W. Lee and Mohamed Zait",
  title =        "Closing the query processing loop in {Oracle 11g}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1368--1378",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454178",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jain:2008:TSS,
  author =       "Namit Jain and Shailendra Mishra and Anand Srinivasan
                 and Johannes Gehrke and Jennifer Widom and Hari
                 Balakrishnan and U{\u{g}}ur {\c{C}}etintemel and Mitch
                 Cherniack and Richard Tibbetts and Stan Zdonik",
  title =        "Towards a streaming {SQL} standard",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1379--1390",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454179",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2008:ESG,
  author =       "Yu Huang and Ziyang Liu and Yi Chen",
  title =        "{eXtract}: a snippet generation system for {XML}
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1392--1395",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454181",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Terwilliger:2008:LIQ,
  author =       "James F. Terwilliger and Sergey Melnik and Philip A.
                 Bernstein",
  title =        "Language-integrated querying of {XML} data in {SQL}
                 server",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1396--1399",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454182",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mathis:2008:XXC,
  author =       "Christian Mathis and Andreas M. Weiner and Theo
                 H{\"a}rder and Caesar Ralf Franz Hoppen",
  title =        "{XTCcmp}: {XQuery} compilation on {XTC}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1400--1403",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454183",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tian:2008:PGG,
  author =       "Yuanyuan Tian and Jignesh M. Patel and Viji Nair and
                 Sebastian Martini and Matthias Kretzler",
  title =        "{Periscope\slash GQ}: a graph querying toolkit",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1404--1407",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454184",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Balmin:2008:SSS,
  author =       "Andrey Balmin and Latha Colby and Emiran Curtmola and
                 Quanzhong Li and Fatma {\"O}zcan and Sharath Srinivas
                 and Zografoula Vagena",
  title =        "{SEDA}: a system for search, exploration, discovery,
                 and analysis of {XML Data}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1408--1411",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454185",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Motahari:2008:PSD,
  author =       "Hamid Motahari and Boualem Benatallah and Regis
                 Saint-Paul and Fabio Casati and Periklis Andritsos",
  title =        "Process spaceship: discovering and exploring process
                 views from event logs in data spaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1412--1415",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454186",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lupu:2008:PPP,
  author =       "Mihai Lupu and Y. C. Tay",
  title =        "{P} 3 {N}: profiling the potential of a peer-based
                 data management system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1416--1419",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454188",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tlili:2008:PLT,
  author =       "Mounir Tlili and W. Kokou Dedzoe and Esther Pacitti
                 and Patrick Valduriez and Reza Akbarinia and Pascal
                 Molli and G{\'e}r{\^o}me Canals and St{\'e}phane
                 Lauri{\`e}re",
  title =        "{P2P} logging and timestamping for reconciliation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1420--1423",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454189",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Luu:2008:ASP,
  author =       "Toan Luu and Gleb Skobeltsyn and Fabius Klemm and
                 Maroje Puh and Ivana Podnar Zarko and Martin Rajman and
                 Karl Aberer",
  title =        "{AlvisP2P}: scalable peer-to-peer text retrieval in a
                 structured {P2P} network",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1424--1427",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454190",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abiteboul:2008:WEP,
  author =       "S. Abiteboul and T. Allard and P. Chatalic and G.
                 Gardarin and A. Ghitescu and F. Goasdou{\'e} and I.
                 Manolescu and B. Nguyen and M. Ouazara and A. Somani
                 and N. Travers and G. Vasile and S. Zoupanos",
  title =        "{WebContent}: efficient {P2P Warehousing} of {Web}
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1428--1431",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454191",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jurczyk:2008:DED,
  author =       "Pawel Jurczyk and Li Xiong",
  title =        "{DObjects}: enabling distributed data services for
                 metacomputing platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1432--1435",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454192",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shao:2008:ETR,
  author =       "Qihong Shao and Yi Chen and Shu Tao and Xifeng Yan and
                 Nikos Anerousis",
  title =        "{EasyTicket}: a ticket routing recommendation engine
                 for enterprise problem resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1436--1439",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454193",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Duda:2008:ACI,
  author =       "Cristian Duda and Gianni Frey and Donald Kossmann and
                 Chong Zhou",
  title =        "{AJAXSearch}: crawling, indexing and searching {Web
                 2.0} applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1440--1443",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454195",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2008:MSH,
  author =       "Kun Liu and Evimaria Terzi and Tyrone Grandison",
  title =        "{ManyAspects}: a system for highlighting diverse
                 concepts in documents",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1444--1447",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454196",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Curtmola:2008:XDC,
  author =       "Emiran Curtmola and Alin Deutsch and Dionysios
                 Logothetis and K. K. Ramakrishnan and Divesh Srivastava
                 and Kenneth Yocum",
  title =        "{XTreeNet}: democratic community search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1448--1451",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454197",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2008:EVK,
  author =       "Guoliang Li and Jianhua Feng and Jianyong Wang and
                 Lizhu Zhou",
  title =        "An effective and versatile keyword search engine on
                 heterogeneous data sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1452--1455",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454198",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Baid:2008:DME,
  author =       "Akanksha Baid and Andrey Balmin and Heasoo Hwang and
                 Erik Nijkamp and Jun Rao and Berthold Reinwald and
                 Alkis Simitsis and Yannis Sismanis and Frank van Ham",
  title =        "{DBPubs}: multidimensional exploration of database
                 publications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1456--1459",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454199",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2008:SDQ,
  author =       "Wenfei Fan and Floris Geerts and Xibei Jia",
  title =        "{Semandaq}: a data quality system based on conditional
                 functional dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1460--1463",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454200",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Katsis:2008:RTI,
  author =       "Yannis Katsis and Alin Deutsch and Yannis
                 Papakonstantinou and Keliang Zhao",
  title =        "{RIDE}: a tool for interactive source registration in
                 community-oriented information integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1464--1467",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454202",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alexe:2008:CEM,
  author =       "Bogdan Alexe and Wang-Chiew Tan and Yannis
                 Velegrakis",
  title =        "Comparing and evaluating mapping systems with
                 {STBenchmark}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1468--1471",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454203",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Logothetis:2008:AHD,
  author =       "Dionysios Logothetis and Kenneth Yocum",
  title =        "Ad-hoc data processing in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1472--1475",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454204",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Weigel:2008:LSC,
  author =       "Felix Weigel and Biswanath Panda and Mirek Riedewald
                 and Johannes Gehrke and Manuel Calimlim",
  title =        "Large-scale collaborative analysis and extraction of
                 {Web} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1476--1479",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454205",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Crecelius:2008:MSS,
  author =       "Tom Crecelius and Mouna Kacimi and Sebastian Michel
                 and Thomas Neumann and Josiane Xavier Parreira and Ralf
                 Schenkel and Gerhard Weikum",
  title =        "Making {SENSE}: socially enhanced search and
                 exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1480--1483",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454206",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2008:ASD,
  author =       "Wentian Lu and Gerome Miklau",
  title =        "{AuditGuard}: a system for database auditing under
                 retention restrictions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1484--1487",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454207",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hu:2008:QVQ,
  author =       "Ling Hu and Kenneth A. Ross and Yuan-Chi Chang and
                 Christian A. Lang and Donghui Zhang",
  title =        "{QueryScope}: visualizing queries for repeatable
                 database tuning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1488--1491",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454209",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hose:2008:WIT,
  author =       "Katja Hose and Daniel Klan and Matthias Marx and
                 Kai-Uwe Sattler",
  title =        "When is it time to rethink the aggregate configuration
                 of your {OLAP} server?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1492--1495",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454210",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kallman:2008:HSH,
  author =       "Robert Kallman and Hideaki Kimura and Jonathan Natkins
                 and Andrew Pavlo and Alexander Rasin and Stanley Zdonik
                 and Evan P. C. Jones and Samuel Madden and Michael
                 Stonebraker and Yang Zhang and John Hugg and Daniel J.
                 Abadi",
  title =        "{H-store}: a high-performance, distributed main memory
                 transaction processing system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1496--1499",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454211",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Perlman:2008:OIN,
  author =       "Eric Perlman and Randal Burns and Michael Kazhdan",
  title =        "Organizing and indexing non-convex regions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1500--1503",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454212",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Paquet:2008:CME,
  author =       "Eric Paquet and Herna L. Viktor",
  title =        "{Capri\slash MR}: exploring protein databases from a
                 structural and physicochemical point of view",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1504--1507",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454213",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guo:2008:CMM,
  author =       "Fan Guo and Lei Li and Christos Faloutsos and Eric P.
                 Xing",
  title =        "{C-DEM}: a multi-modal query system for {Drosophila
                 Embryo} databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1508--1511",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454214",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Milo:2008:QMD,
  author =       "Tova Milo and Daniel Deutch",
  title =        "Querying and monitoring distributed business
                 processes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1512--1515",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454216",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Franklin:2008:FTD,
  author =       "Michael Franklin and Alon Halevy and David Maier",
  title =        "A first tutorial on dataspaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1516--1517",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454217",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Franconi:2008:ODM,
  author =       "Enrico Franconi",
  title =        "Ontologies and databases: myths and challenges",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1518--1519",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454218",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Balazinska:2008:SAP,
  author =       "Magdalena Balazinska and Christopher R{\'e} and Dan
                 Suciu",
  title =        "Systems aspects of probabilistic data management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1520--1521",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454219",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2008:RIC,
  author =       "Wenfei Fan and Floris Geerts and Xibei Jia",
  title =        "A revival of integrity constraints for data cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1522--1523",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454220",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Moro:2008:XSS,
  author =       "Mirella M. Moro and Zografoula Vagena and Vassilis J.
                 Tsotras",
  title =        "{XML Structural Summaries}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1524--1525",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454221",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sharaf:2008:SCQ,
  author =       "Mohamed A. Sharaf and Alexandros Labrinidis and Panos
                 K. Chrysanthis",
  title =        "Scheduling continuous queries in data stream
                 management systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1526--1527",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454222",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kriegel:2008:DCM,
  author =       "Hans-Peter Kriegel and Peer Kr{\"o}ger and Arthur
                 Zimek",
  title =        "Detecting clusters in moderate-to-high dimensional
                 data: subspace clustering, pattern-based clustering,
                 and correlation clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1528--1529",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454223",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cormode:2008:FFI,
  author =       "Graham Cormode and Marios Hadjieleftheriou",
  title =        "Finding frequent items in data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1530--1541",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454225",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2008:QMT,
  author =       "Hui Ding and Goce Trajcevski and Peter Scheuermann and
                 Xiaoyue Wang and Eamonn Keogh",
  title =        "Querying and mining of time series data: experimental
                 comparison of representations and distance measures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1542--1552",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454226",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sidirourgos:2008:CSS,
  author =       "Lefteris Sidirourgos and Romulo Goncalves and Martin
                 Kersten and Niels Nes and Stefan Manegold",
  title =        "Column-store support for {RDF} data management: not
                 all swans are white",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1553--1563",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454227",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sans:2008:PBN,
  author =       "Virginie Sans and Dominique Laurent",
  title =        "Prefix based numbering schemes for {XML}: techniques,
                 applications and performances",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1564--1573",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454228",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2008:BEM,
  author =       "Su Chen and Christian S. Jensen and Dan Lin",
  title =        "A benchmark for evaluating moving object indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1574--1585",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454229",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dittrich:2008:DRM,
  author =       "Jens Dittrich and Lukas Blunschi and Marcos Antonio
                 Vaz Salles",
  title =        "Dwarfs in the rearview mirror: how big are they
                 really?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1586--1597",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454230",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shao:2008:CTE,
  author =       "Jie Shao and Heng Tao Shen and Xiaofang Zhou",
  title =        "Challenges and techniques for effective and efficient
                 similarity search in large video databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1598--1603",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454232",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hopfgartner:2008:SIM,
  author =       "Frank Hopfgartner",
  title =        "Studying interaction methodologies in video
                 retrieval",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1604--1608",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454233",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lo:2008:MPR,
  author =       "David Lo and Siau-Cheng Khoo",
  title =        "Mining patterns and rules for software specification
                 discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1609--1616",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454234",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Biveinis:2008:TEM,
  author =       "Laurynas Biveinis and Simonas Saltenis",
  title =        "Towards efficient main-memory use for optimum tree
                 index update",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1617--1622",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454236",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Holupirek:2008:IFT,
  author =       "Alexander Holupirek and Marc H. Scholl",
  title =        "Implementing filesystems by tree-aware {DBMSs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1623--1630",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454237",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Avanes:2008:AWS,
  author =       "Artin Avanes and Johann-Christoph Freytag",
  title =        "Adaptive workflow scheduling under resource allocation
                 constraints and network dynamics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1631--1637",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454238",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zerr:2008:PPD,
  author =       "Sergej Zerr and Wolfgang Nejdl",
  title =        "Privacy preserving document indexing infrastructure
                 for a distributed environment",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1638--1643",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454240",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Miao:2008:GTG,
  author =       "Jiajia Miao",
  title =        "{GS-TMS}: a global stream-based threat monitor
                 system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1644--1651",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454241",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kharlamov:2008:III,
  author =       "Evgeny Kharlamov and Werner Nutt",
  title =        "Incompleteness in information integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1652--1658",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454242",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2008:QWB,
  author =       "Daniel Deutch and Tova Milo",
  title =        "Querying {Web}-based applications under models of
                 uncertainty",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1659--1665",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454244",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Silvasti:2008:XDF,
  author =       "Panu Silvasti and Seppo Sippu and Eljas
                 Soisalon-Soininen",
  title =        "{XML}-document-filtering automaton",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1666--1671",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454245",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Scholl:2008:CDD,
  author =       "Tobias Scholl and Alfons Kemper",
  title =        "Community-driven data grids",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "1",
  number =       "2",
  pages =        "1672--1677",
  month =        aug,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1454159.1454246",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:44 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gatterbauer:2009:BIA,
  author =       "Wolfgang Gatterbauer and Magdalena Balazinska and
                 Nodira Khoussainova and Dan Suciu",
  title =        "Believe it or not: adding belief annotations to
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1--12",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2009:SSB,
  author =       "Zhenjie Zhang and Beng Chin Ooi and Srinivasan
                 Parthasarathy and Anthony K. H. Tung",
  title =        "Similarity search on {Bregman} divergence: towards
                 non-metric indexing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "13--24",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeng:2009:CSA,
  author =       "Zhiping Zeng and Anthony K. H. Tung and Jianyong Wang
                 and Jianhua Feng and Lizhu Zhou",
  title =        "Comparing stars: on approximating graph edit
                 distance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "25--36",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Whang:2009:IBE,
  author =       "Steven Euijong Whang and Hector Garcia-Molina and Chad
                 Brower and Jayavel Shanmugasundaram and Sergei
                 Vassilvitskii and Erik Vee and Ramana Yerneni",
  title =        "Indexing {Boolean} expressions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "37--48",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2009:SDS,
  author =       "Yongluan Zhou and Ali Salehi and Karl Aberer",
  title =        "Scalable delivery of stream query result",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "49--60",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Benedikt:2009:SBI,
  author =       "Michael Benedikt and James Cheney",
  title =        "Schema-based independence analysis for {XML} updates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "61--72",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nehme:2009:TSD,
  author =       "Rimma V. Nehme and Elke A. Rundensteiner and Elisa
                 Bertino",
  title =        "Tagging stream data for rich real-time services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "73--84",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sarma:2009:RMP,
  author =       "Atish Das Sarma and Ashwin Lall and Danupon Nanongkai
                 and Jun Xu",
  title =        "Randomized multi-pass streaming skyline algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "85--96",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Reeves:2009:MMT,
  author =       "Galen Reeves and Jie Liu and Suman Nath and Feng
                 Zhao",
  title =        "Managing massive time series streams with multi-scale
                 compressed trickles",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "97--108",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2009:PAM,
  author =       "Tianyi Wu and Dong Xin and Qiaozhu Mei and Jiawei
                 Han",
  title =        "Promotion analysis in multi-dimensional space",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "109--120",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sarkas:2009:MDK,
  author =       "Nikos Sarkas and Nilesh Bansal and Gautam Das and Nick
                 Koudas",
  title =        "Measure-driven keyword-query expansion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "121--132",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2009:UTD,
  author =       "Bin Liu and H. V. Jagadish",
  title =        "Using trees to depict a forest",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "133--144",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elmeleegy:2009:OPW,
  author =       "Hazem Elmeleegy and Ahmed K. Elmagarmid and Emmanuel
                 Cecchet and Walid G. Aref and Willy Zwaenepoel",
  title =        "Online piece-wise linear approximation of numerical
                 streams with precision guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "145--156",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Stern:2009:WTE,
  author =       "Mirco Stern and Erik Buchmann and Klemens B{\"o}hm",
  title =        "A wavelet transform for efficient consolidation of
                 sensor relations with quality guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "157--168",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2009:EAQ,
  author =       "Liu Yu and Jianzhong Li and Hong Gao and Xiaolin
                 Fang",
  title =        "Enabling $ \epsilon $-approximate querying in sensor
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "169--180",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nandi:2009:HUS,
  author =       "Arnab Nandi and Philip A. Bernstein",
  title =        "{HAMSTER}: using search clicklogs for schema and
                 taxonomy matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "181--192",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kot:2009:CUE,
  author =       "Lucja Kot and Christoph Koch",
  title =        "Cooperative update exchange in the {Youtopia} system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "193--204",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papapetrou:2009:RBA,
  author =       "Panagiotis Papapetrou and Vassilis Athitsos and George
                 Kollios and Dimitrios Gunopulos",
  title =        "Reference-based alignment in large sequence
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "205--216",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Das:2009:TCM,
  author =       "Sudipto Das and Shyam Antony and Divyakant Agrawal and
                 Amr {El Abbadi}",
  title =        "Thread cooperation in multicore architectures for
                 frequency counting over multiple data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "217--228",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mueller:2009:SWQ,
  author =       "Rene Mueller and Jens Teubner and Gustavo Alonso",
  title =        "Streams on wires: a query compiler for {FPGAs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "229--240",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2009:FPD,
  author =       "Badrish Chandramouli and Jonathan Goldstein and David
                 Maier",
  title =        "On-the-fly progress detection in iterative stream
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "241--252",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kraska:2009:CRC,
  author =       "Tim Kraska and Martin Hentschel and Gustavo Alonso and
                 Donald Kossmann",
  title =        "Consistency rationing in the cloud: pay only when it
                 matters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "253--264",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lomet:2009:LKR,
  author =       "David Lomet and Mohamed F. Mokbel",
  title =        "Locking key ranges with unbundled transaction
                 services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "265--276",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Candea:2009:SPJ,
  author =       "George Candea and Neoklis Polyzotis and Radek
                 Vingralek",
  title =        "A scalable, predictable join operator for highly
                 concurrent data warehouses",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "277--288",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gupta:2009:ATA,
  author =       "Rahul Gupta and Sunita Sarawagi",
  title =        "Answering table augmentation queries from unstructured
                 lists on the {Web}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "289--300",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cautis:2009:ERX,
  author =       "Bogdan Cautis and Alin Deutsch and Nicola Onose and
                 Vasilis Vassalos",
  title =        "Efficient rewriting of {XPath} queries using {Query
                 Set Specifications}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "301--312",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2009:SSR,
  author =       "Ziyang Liu and Peng Sun and Yi Chen",
  title =        "Structured search result differentiation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "313--324",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dragut:2009:HAM,
  author =       "Eduard C. Dragut and Thomas Kabisch and Clement Yu and
                 Ulf Leser",
  title =        "A hierarchical approach to model {Web} query
                 interfaces for {Web} source integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "325--336",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cong:2009:ERT,
  author =       "Gao Cong and Christian S. Jensen and Dingming Wu",
  title =        "Efficient retrieval of the top-$k$ most relevant
                 spatial {Web} objects",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "337--348",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dragut:2009:SWR,
  author =       "Eduard Dragut and Fang Fang and Prasad Sistla and
                 Clement Yu and Weiyi Meng",
  title =        "Stop word and related problems in {Web} interface
                 integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "349--360",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Agrawal:2009:LAT,
  author =       "Devesh Agrawal and Deepak Ganesan and Ramesh Sitaraman
                 and Yanlei Diao and Shashi Singh",
  title =        "Lazy-Adaptive {Tree}: an optimized index structure for
                 flash devices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "361--372",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2009:MDM,
  author =       "Rubao Lee and Xiaoning Ding and Feng Chen and Qingda
                 Lu and Xiaodong Zhang",
  title =        "{MCC-DB}: minimizing cache conflicts in multi-core
                 processors for databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "373--384",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Willhalm:2009:SSU,
  author =       "Thomas Willhalm and Nicolae Popovici and Yazan Boshmaf
                 and Hasso Plattner and Alexander Zeier and Jan
                 Schaffner",
  title =        "{SIMD-scan}: ultra fast in-memory table scan using
                 on-chip vector processing units",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "385--394",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chaudhuri:2009:MDC,
  author =       "Surajit Chaudhuri and Venkatesh Ganti and Dong Xin",
  title =        "Mining document collections to facilitate accurate
                 approximate entity matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "395--406",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2009:RAR,
  author =       "Wenfei Fan and Xibei Jia and Jianzhong Li and Shuai
                 Ma",
  title =        "Reasoning about record matching rules",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "407--418",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dobra:2009:TCE,
  author =       "Alin Dobra and Chris Jermaine and Florin Rusu and Fei
                 Xu",
  title =        "Turbo-charging estimate convergence in {DBO}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "419--430",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cohen:2009:CSA,
  author =       "Edith Cohen and Nick Duffield and Haim Kaplan and
                 Carsten Lund and Mikkel Thorup",
  title =        "Composable, scalable, and accurate weight
                 summarization of unaggregated data sets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "431--442",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2009:DOA,
  author =       "Sai Wu and Shouxu Jiang and Beng Chin Ooi and Kian-Lee
                 Tan",
  title =        "Distributed online aggregations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "443--454",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koloniari:2009:RBC,
  author =       "Georgia Koloniari and Evaggelia Pitoura",
  title =        "A recall-based cluster formation game in peer-to-peer
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "455--466",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fekete:2009:QIA,
  author =       "Alan Fekete and Shirley N. Goldrei and Jorge P{\'e}rez
                 Asenjo",
  title =        "Quantifying isolation anomalies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "467--478",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Johnson:2009:IOS,
  author =       "Ryan Johnson and Ippokratis Pandis and Anastasia
                 Ailamaki",
  title =        "Improving {OLTP} scalability using speculative lock
                 inheritance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "479--489",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sears:2009:SBR,
  author =       "Russell Sears and Eric Brewer",
  title =        "Segment-based recovery: write-ahead logging
                 revisited",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "490--501",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2009:UAR,
  author =       "Jian Li and Barna Saha and Amol Deshpande",
  title =        "A unified approach to ranking in probabilistic
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "502--513",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arasu:2009:LST,
  author =       "Arvind Arasu and Surajit Chaudhuri and Raghav
                 Kaushik",
  title =        "Learning string transformations from examples",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "514--525",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cormode:2009:PHP,
  author =       "Graham Cormode and Antonios Deligiannakis and Minos
                 Garofalakis and Andrew McGregor",
  title =        "Probabilistic histograms for probabilistic data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "526--537",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Greenshpan:2009:AM,
  author =       "Ohad Greenshpan and Tova Milo and Neoklis Polyzotis",
  title =        "Autocompletion for mashups",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "538--549",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2009:ICD,
  author =       "Xin Luna Dong and Laure Berti-Equille and Divesh
                 Srivastava",
  title =        "Integrating conflicting data: the role of source
                 dependence",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "550--561",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2009:TDC,
  author =       "Xin Luna Dong and Laure Berti-Equille and Divesh
                 Srivastava",
  title =        "Truth discovery and copying detection in a dynamic
                 world",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "562--573",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Golab:2009:SD,
  author =       "Lukasz Golab and Howard Karloff and Flip Korn and
                 Avishek Saha and Divesh Srivastava",
  title =        "Sequential dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "574--585",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Denev:2009:SFQ,
  author =       "Dimitar Denev and Arturas Mazeika and Marc Spaniol and
                 Gerhard Weikum",
  title =        "{SHARC}: framework for quality-conscious {Web}
                 archiving",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "586--597",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Beskales:2009:MQP,
  author =       "George Beskales and Mohamed A. Soliman and Ihab F.
                 Ilyas and Shai Ben-David",
  title =        "Modeling and querying possible repairs in duplicate
                 detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "598--609",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mindolin:2009:DRI,
  author =       "Denis Mindolin and Jan Chomicki",
  title =        "Discovering relative importance of skyline
                 attributes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "610--621",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kim:2009:PDB,
  author =       "Min-Soo Kim and Jiawei Han",
  title =        "A particle-and-density based evolutionary clustering
                 method for dynamic networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "622--633",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2009:SRD,
  author =       "Xiaoyan Yang and Cecilia M. Procopiuc and Divesh
                 Srivastava",
  title =        "Summarizing relational databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "634--645",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cohen:2009:CWS,
  author =       "Edith Cohen and Haim Kaplan and Subhabrata Sen",
  title =        "Coordinated weighted sampling for estimating
                 aggregates over multiple weight assignments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "646--657",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2009:PLB,
  author =       "Hongrae Lee and Raymond T. Ng and Kyuseok Shim",
  title =        "Power-law based estimation of set similarity join
                 size",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "658--669",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karras:2009:OSL,
  author =       "Panagiotis Karras",
  title =        "Optimality and scalability in lattice histogram
                 construction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "670--681",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vigfusson:2009:APD,
  author =       "Ymir Vigfusson and Adam Silberstein and Brian F.
                 Cooper and Rodrigo Fonseca",
  title =        "Adaptively parallelizing distributed range queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "682--693",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tatikonda:2009:MTS,
  author =       "Shirish Tatikonda and Srinivasan Parthasarathy",
  title =        "Mining tree-structured data on multicore systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "694--705",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Unterbrunner:2009:PPU,
  author =       "P. Unterbrunner and G. Giannikis and G. Alonso and D.
                 Fauser and D. Kossmann",
  title =        "Predictable performance for unpredictable workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "706--717",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2009:GCB,
  author =       "Yang Zhou and Hong Cheng and Jeffrey Xu Yu",
  title =        "Graph clustering based on structural\slash attribute
                 similarities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "718--729",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{AlHasan:2009:OSS,
  author =       "Mohammad {Al Hasan} and Mohammed J. Zaki",
  title =        "Output space sampling for graph patterns",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "730--741",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2009:MGP,
  author =       "Chen Chen and Cindy X. Lin and Matt Fredrikson and
                 Mihai Christodorescu and Xifeng Yan and Jiawei Han",
  title =        "Mining graph patterns efficiently via randomized
                 summaries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "742--753",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amer-Yahia:2009:GRS,
  author =       "Sihem Amer-Yahia and Senjuti Basu Roy and Ashish
                 Chawlat and Gautam Das and Cong Yu",
  title =        "Group recommendation: semantics and efficiency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "754--765",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bhagat:2009:CBG,
  author =       "Smriti Bhagat and Graham Cormode and Balachander
                 Krishnamurthy and Divesh Srivastava",
  title =        "Class-based graph anonymization for social network
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "766--777",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sarkas:2009:ISS,
  author =       "Nikos Sarkas and Gautam Das and Nick Koudas",
  title =        "Improved search for socially annotated data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "778--789",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Machanavajjhala:2009:DPA,
  author =       "Ashwin Machanavajjhala and Johannes Gehrke and
                 Michaela G{\"o}tz",
  title =        "Data publishing against realistic adversaries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "790--801",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pang:2009:SVO,
  author =       "HweeHwa Pang and Jilian Zhang and Kyriakos
                 Mouratidis",
  title =        "Scalable verification for outsourced dynamic
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "802--813",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xiao:2009:ORP,
  author =       "Xiaokui Xiao and Yufei Tao and Minghua Chen",
  title =        "Optimal random perturbation at multiple privacy
                 levels",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "814--825",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Assent:2009:ADE,
  author =       "Ira Assent and Marc Wichterich and Ralph Krieger and
                 Hardy Kremer and Thomas Seidl",
  title =        "Anticipatory {DTW} for efficient similarity search in
                 time series databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "826--837",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tsirogiannis:2009:IPL,
  author =       "Dimitris Tsirogiannis and Sudipto Guha and Nick
                 Koudas",
  title =        "Improving the performance of list intersection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "838--849",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kaushik:2009:CHP,
  author =       "Raghav Kaushik and Dan Suciu",
  title =        "Consistent histograms in the presence of distinct
                 value counts",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "850--861",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aggarwal:2009:GCI,
  author =       "Charu Aggarwal and Yan Xie and Philip S. Yu",
  title =        "{GConnect}: a connectivity index for massive
                 disk-resident graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "862--873",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2009:SES,
  author =       "Di Yang and Elke A. Rundensteiner and Matthew O.
                 Ward",
  title =        "A shared execution strategy for multiple pattern
                 mining requests over streaming data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "874--885",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zou:2009:DJP,
  author =       "Lei Zou and Lei Chen and M. Tamer {\"O}zsu",
  title =        "Distance-join: pattern match query in a large graph
                 database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "886--897",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wan:2009:CCP,
  author =       "Qian Wan and Raymond Chi-Wing Wong and Ihab F. Ilyas
                 and M. Tamer {\"O}zsu and Yu Peng",
  title =        "Creating competitive products",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "898--909",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mueller:2009:DPF,
  author =       "Rene Mueller and Jens Teubner and Gustavo Alonso",
  title =        "Data processing on {FPGAs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "910--921",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abouzeid:2009:HAH,
  author =       "Azza Abouzeid and Kamil Bajda-Pawlikowski and Daniel
                 Abadi and Avi Silberschatz and Alexander Rasin",
  title =        "{HadoopDB}: an architectural hybrid of {MapReduce} and
                 {DBMS} technologies for analytical workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "922--933",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2009:ASV,
  author =       "Yeye He and Jeffrey F. Naughton",
  title =        "Anonymization of set-valued data via top-down, local
                 generalization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "934--945",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zou:2009:AGF,
  author =       "Lei Zou and Lei Chen and M. Tamer {\"O}zsu",
  title =        "$k$-automorphism: a general framework for privacy
                 preserving network publication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "946--957",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koudas:2009:DBM,
  author =       "Nick Koudas and Divesh Srivastava and Ting Yu and Qing
                 Zhang",
  title =        "Distribution based microdata anonymization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "958--969",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Meier:2009:CTB,
  author =       "Michael Meier and Michael Schmidt and Georg Lausen",
  title =        "On chase termination beyond stratification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "970--981",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Moerkotte:2009:PBP,
  author =       "Guido Moerkotte and Thomas Neumann and Gabriele
                 Steidl",
  title =        "Preventing bad plans by bounding the impact of
                 cardinality estimation errors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "982--993",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chaudhuri:2009:ECQ,
  author =       "Surajit Chaudhuri and Vivek Narasayya and Ravi
                 Ramamurthy",
  title =        "Exact cardinality query optimization for optimizer
                 testing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "994--1005",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{tenCate:2009:LSM,
  author =       "Balder ten Cate and Laura Chiticariu and Phokion
                 Kolaitis and Wang-Chiew Tan",
  title =        "Laconic schema mappings: computing the core with {SQL}
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1006--1017",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arenas:2009:ISM,
  author =       "Marcelo Arenas and Jorge P{\'e}rez and Juan Reutter
                 and Cristian Riveros",
  title =        "Inverting schema mappings: bridging the gap between
                 theory and practice",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1018--1029",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Terwilliger:2009:FFF,
  author =       "James F. Terwilliger and Philip A. Bernstein and
                 Sergey Melnik",
  title =        "Full-fidelity flexible object-oriented {XML} access",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1030--1041",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2009:PAM,
  author =       "Ting Wang and Ling Liu",
  title =        "Privacy-aware mobile services over road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1042--1053",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{U:2009:FAA,
  author =       "Leong Hou U. and Nikos Mamoulis and Kyriakos
                 Mouratidis",
  title =        "A fair assignment algorithm for multiple preference
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1054--1065",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mishima:2009:PED,
  author =       "Takeshi Mishima and Hiroshi Nakamura",
  title =        "Pangea: an eager database replication middleware
                 guaranteeing snapshot isolation without modification of
                 database servers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1066--1077",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elmeleegy:2009:HRT,
  author =       "Hazem Elmeleegy and Jayant Madhavan and Alon Halevy",
  title =        "Harvesting relational tables from lists on the web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1078--1089",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cafarella:2009:DIR,
  author =       "Michael J. Cafarella and Alon Halevy and Nodira
                 Khoussainova",
  title =        "Data integration for the relational web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1090--1101",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gottlob:2009:NOS,
  author =       "Georg Gottlob and Reinhard Pichler and Vadim
                 Savenkov",
  title =        "Normalization and optimization of schema mappings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1102--1113",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xing:2009:CMN,
  author =       "Songhua Xing and Cyrus Shahabi and Bei Pan",
  title =        "Continuous monitoring of nearest neighbors on land
                 surface",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1114--1125",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wong:2009:EMM,
  author =       "Raymond Chi-Wing Wong and M. Tamer {\"O}zsu and Philip
                 S. Yu and Ada Wai-Chee Fu and Lian Liu",
  title =        "Efficient method for maximizing bichromatic reverse
                 nearest neighbor",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1126--1137",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cheema:2009:LUE,
  author =       "Muhammad Aamir Cheema and Xuemin Lin and Ying Zhang
                 and Wei Wang and Wenjie Zhang",
  title =        "Lazy updates: an efficient technique to continuously
                 monitoring reverse {kNN}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1138--1149",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2009:NMM,
  author =       "Ling Chen and Sourav S. Bhowmick and Wolfgang Nejdl",
  title =        "{NEAR-Miner}: mining evolution associations of {Web}
                 site directories for efficient maintenance of {Web}
                 archives",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1150--1161",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wong:2009:AEO,
  author =       "W. K. Wong and David W. Cheung and Edward Hung and Ben
                 Kao and Nikos Mamoulis",
  title =        "An audit environment for outsourcing of frequent
                 itemset mining",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1162--1173",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mozafari:2009:PNB,
  author =       "Barzan Mozafari and Carlo Zaniolo",
  title =        "Publishing naive {Bayesian} classifiers: privacy
                 without accuracy loss",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1174--1185",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tzoumas:2009:WAI,
  author =       "Kostas Tzoumas and Man Lung Yiu and Christian S.
                 Jensen",
  title =        "Workload-aware indexing of continuously moving
                 objects",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1186--1197",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2009:EIU,
  author =       "Meihui Zhang and Su Chen and Christian S. Jensen and
                 Beng Chin Ooi and Zhenjie Zhang",
  title =        "Effectively indexing uncertain moving objects for
                 predictive queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1198--1209",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sankaranarayanan:2009:POS,
  author =       "Jagan Sankaranarayanan and Hanan Samet and Houman
                 Alborzi",
  title =        "Path oracles for spatial networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1210--1221",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kimura:2009:CMC,
  author =       "Hideaki Kimura and George Huo and Alexander Rasin and
                 Samuel Madden and Stanley B. Zdonik",
  title =        "Correlation maps: a compressed access method for
                 exploiting soft functional dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1222--1233",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schnaitter:2009:IIP,
  author =       "Karl Schnaitter and Neoklis Polyzotis and Lise
                 Getoor",
  title =        "Index interactions in physical design tuning:
                 modeling, analysis, and applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1234--1245",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Duan:2009:TDC,
  author =       "Songyun Duan and Vamsidhar Thummala and Shivnath
                 Babu",
  title =        "Tuning database configuration parameters with
                 {iTuned}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1246--1257",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Salles:2009:ECR,
  author =       "Marcos Vaz Salles and Tuan Cao and Benjamin Sowell and
                 Alan Demers and Johannes Gehrke and Christoph Koch and
                 Walker White",
  title =        "An evaluation of checkpoint recovery for massively
                 multiplayer online games",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1258--1269",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Muller:2009:ECS,
  author =       "Emmanuel M{\"u}ller and Stephan G{\"u}nnemann and Ira
                 Assent and Thomas Seidl",
  title =        "Evaluating clustering in subspace projections of high
                 dimensional data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1270--1281",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hassanzadeh:2009:FEC,
  author =       "Oktie Hassanzadeh and Fei Chiang and Hyun Chul Lee and
                 Ren{\'e}e J. Miller",
  title =        "Framework for evaluating clustering algorithms in
                 duplicate detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "1",
  pages =        "1282--1293",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:50 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guo:2009:DMM,
  author =       "Hongfei Guo and Dan Jones and Jennifer Beckmann and
                 Praveen Seshadri",
  title =        "Declarative management in {Microsoft SQL} server",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1294--1305",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{El-Helw:2009:SRS,
  author =       "Amr El-Helw and Ihab F. Ilyas and Calisto Zuzarte",
  title =        "{StatAdvisor}: recommending statistical views",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1306--1317",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Canim:2009:OPA,
  author =       "Mustafa Canim and George A. Mihaila and Bishwaranjan
                 Bhattacharjee and Kenneth A. Ross and Christian A.
                 Lang",
  title =        "An object placement advisor for {DB2} using solid
                 state storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1318--1329",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bhide:2009:XXP,
  author =       "Manish Bhide and Manoj K. Agarwal and Amir Bar-Or and
                 Sriram Padmanabhan and Srinivas K. Mittapalli and
                 Girish Venkatachaliah",
  title =        "{XPEDIA}: {XML} processing for data integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1330--1341",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bamford:2009:XR,
  author =       "Roger Bamford and Vinayak Borkar and Matthias Brantner
                 and Peter M. Fischer and Daniela Florescu and David
                 Graf and Donald Kossmann and Tim Kraska and Dan Muresan
                 and Sorin Nasoi and Markos Zacharioudakis",
  title =        "{XQuery} reloaded",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1342--1353",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2009:BXS,
  author =       "Ning Zhang and Nipun Agarwal and Sivasankaran
                 Chandrasekar and Sam Idicula and Vijay Medi and Sabina
                 Petride and Balasubramanyam Sthanikam",
  title =        "Binary {XML} storage and query processing in {Oracle
                 11g}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1354--1365",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bellamkonda:2009:ESO,
  author =       "Srikanth Bellamkonda and Rafi Ahmed and Andrew
                 Witkowski and Angela Amor and Mohamed Zait and
                 Chun-Chieh Lin",
  title =        "Enhanced subquery optimizations in {Oracle}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1366--1377",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kim:2009:SVH,
  author =       "Changkyu Kim and Tim Kaldewey and Victor W. Lee and
                 Eric Sedlar and Anthony D. Nguyen and Nadathur Satish
                 and Jatin Chhugani and Andrea {Di Blas} and Pradeep
                 Dubey",
  title =        "Sort vs. {Hash} revisited: fast join implementation on
                 modern multi-core {CPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1378--1389",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2009:EOJ,
  author =       "Yu Xu and Pekka Kostamaa",
  title =        "Efficient outer join data skew handling in parallel
                 {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1390--1396",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Friedman:2009:SMP,
  author =       "Eric Friedman and Peter Pawlowski and John
                 Cieslewicz",
  title =        "{SQL\slash MapReduce}: a practical approach to
                 self-describing, polymorphic, and parallelizable
                 user-defined functions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1402--1413",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gates:2009:BHL,
  author =       "Alan F. Gates and Olga Natkovich and Shubham Chopra
                 and Pradeep Kamath and Shravan M. Narayanamurthy and
                 Christopher Olston and Benjamin Reed and Santhosh
                 Srinivasan and Utkarsh Srivastava",
  title =        "Building a high-level dataflow system on top of
                 {Map-Reduce}: the {Pig} experience",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1414--1425",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Panda:2009:PMP,
  author =       "Biswanath Panda and Joshua S. Herbach and Sugato Basu
                 and Roberto J. Bayardo",
  title =        "{PLANET}: massively parallel learning of tree
                 ensembles with {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1426--1437",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Legler:2009:RDT,
  author =       "Thomas Legler and Wolfgang Lehner and Jan Schaffner
                 and Jens Kr{\"u}ger",
  title =        "Robust and distributed top-n frequent-pattern mining
                 with {SAP BW} accelerator",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1438--1449",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dieu:2009:TUF,
  author =       "Nicolas Dieu and Adrian Dragusanu and Fran{\c{c}}oise
                 Fabret and Fran{\c{c}}ois Llirbat and Eric Simon",
  title =        "1,000 tables under the form",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1450--1461",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bhattacharjee:2009:EIC,
  author =       "Bishwaranjan Bhattacharjee and Lipyeow Lim and Timothy
                 Malkemus and George Mihaila and Kenneth Ross and
                 Sherman Lau and Cathy McArthur and Zoltan Toth and Reza
                 Sherkat",
  title =        "Efficient index compression in {DB2 LUW}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1462--1473",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lacroix:2009:SSW,
  author =       "Zo{\'e} Lacroix and Christophe Legendre and Spyro
                 Mousses",
  title =        "Storing scientific workflows in a database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1474--1480",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cohen:2009:MSN,
  author =       "Jeffrey Cohen and Brian Dolan and Mark Dunlap and
                 Joseph M. Hellerstein and Caleb Welton",
  title =        "{MAD} skills: new analysis practices for big data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1481--1492",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ley:2009:DSL,
  author =       "Michael Ley",
  title =        "{DBLP}: some lessons learned",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1493--1500",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mukherjee:2009:OSP,
  author =       "Niloy Mukherjee and Amit Ganesh and Vinayagam
                 Djegaradjane and Sujatha Muthulingam and Wei Zhang and
                 Krishna Kunchithapadam and Scott Lynn and Bharath Aleti
                 and Kam Shergill and Shaoyu Wang",
  title =        "{Oracle SecureFiles}: prepared for the digital
                 deluge",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1501--1511",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Baumgartner:2009:SWD,
  author =       "Robert Baumgartner and Georg Gottlob and Marcus
                 Herzog",
  title =        "Scalable {Web} data extraction for online market
                 intelligence",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1512--1523",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rajaraman:2009:KHP,
  author =       "Anand Rajaraman",
  title =        "{Kosmix}: high-performance topic exploration using the
                 deep {Web}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1524--1529",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nehme:2009:QMM,
  author =       "Rimma V. Nehme and Karen E. Works and Elke A.
                 Rundensteiner and Elisa Bertino",
  title =        "Query mesh: multi-route query processing technology",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1530--1533",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cudre-Mauroux:2009:DSS,
  author =       "P. Cudre-Mauroux and H. Kimura and K.-T. Lim and J.
                 Rogers and R. Simakov and E. Soroush and P. Velikhov
                 and D. L. Wang and M. Balazinska and J. Becla and D.
                 DeWitt and B. Heath and D. Maier and S. Madden and J.
                 Patel and M. Stonebraker and S. Zdonik",
  title =        "A demonstration of {SciDB}: a science-oriented
                 {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1534--1537",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2009:MMM,
  author =       "Kuien Liu and Ke Deng and Zhiming Ding and Mingshu Li
                 and Xiaofang Zhou",
  title =        "{MOIR\slash MT}: monitoring large-scale road network
                 traffic in real-time",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1538--1541",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Colle:2009:ODR,
  author =       "Romain Colle and Leonidas Galanis and Supiti
                 Buranawatanachoke and Stratos Papadomanolakis and Yujun
                 Wang",
  title =        "{Oracle Database Replay}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1542--1545",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Borisov:2009:DPD,
  author =       "Nedyalko Borisov and Shivnath Babu and Sandeep
                 Uttamchandani and Ramani Routray and Aameek Singh",
  title =        "{DIADS}: a problem diagnosis tool for databases and
                 storage area networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1546--1549",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Herschel:2009:ASA,
  author =       "Melanie Herschel and Mauricio A. Hern{\'a}ndez and
                 Wang-Chiew Tan",
  title =        "{Artemis}: a system for analyzing missing answers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1550--1553",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2009:DTS,
  author =       "Eugene Wu and Philippe Cudre-Mauroux and Samuel
                 Madden",
  title =        "Demonstration of the {TrajStore} system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1554--1557",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ali:2009:MCS,
  author =       "M. H. Ali and C. Gerea and B. S. Raman and B. Sezgin
                 and T. Tarnavski and T. Verona and P. Wang and P.
                 Zabback and A. Ananthanarayan and A. Kirilov and M. Lu
                 and A. Raizman and R. Krishnan and R. Schindlauer and
                 T. Grabs and S. Bjeletich and B. Chandramouli and J.
                 Goldstein and S. Bhat and Ying Li and V. {Di Nicola}
                 and X. Wang and David Maier and S. Grell and O. Nano
                 and I. Santos",
  title =        "{Microsoft CEP Server} and online behavioral
                 targeting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1558--1561",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Krompass:2009:TMD,
  author =       "Stefan Krompass and Harumi Kuno and Janet L. Wiener
                 and Kevin Wilkinson and Umeshwar Dayal and Alfons
                 Kemper",
  title =        "A testbed for managing dynamic mixed workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1562--1565",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ahmad:2009:DSC,
  author =       "Yanif Ahmad and Christoph Koch",
  title =        "{DBToaster}: a {SQL} compiler for high-performance
                 delta processing in main-memory databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1566--1569",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Preda:2009:AAK,
  author =       "Nicoleta Preda and Fabian M. Suchanek and Gjergji
                 Kasneci and Thomas Neumann and Maya Ramanath and
                 Gerhard Weikum",
  title =        "{ANGIE}: active knowledge for interactive
                 exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1570--1573",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kopcke:2009:CEE,
  author =       "Hanna K{\"o}pcke and Andreas Thor and Erhard Rahm",
  title =        "Comparative evaluation of entity resolution approaches
                 with {FEVER}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1574--1577",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Brauer:2009:RDR,
  author =       "Falk Brauer and Wojciech Barczynski and Gregor
                 Hackenbroich and Marcus Schramm and Adrian Mocan and
                 Felix F{\"o}rster",
  title =        "{RankIE}: document retrieval on ranked entity graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1578--1581",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mecca:2009:CEM,
  author =       "Giansalvatore Mecca and Paolo Papotti and Salvatore
                 Raunich and Marcello Buoncristiano",
  title =        "Concise and expressive mappings with +Spicy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1582--1585",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cruz:2009:AEM,
  author =       "Isabel F. Cruz and Flavio Palandri Antonelli and
                 Cosmin Stroe",
  title =        "{AgreementMaker}: efficient matching for large
                 real-world schemas and ontologies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1586--1589",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hassanzadeh:2009:LQW,
  author =       "Oktie Hassanzadeh and Reynold Xin and Ren{\'e}e J.
                 Miller and Anastasios Kementsietsidis and Lipyeow Lim
                 and Min Wang",
  title =        "{Linkage Query Writer}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1590--1593",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2009:SEE,
  author =       "Xiaoyuan Wang and Xingzhi Sun and Feng Cao and Li Ma
                 and Nick Kanellos and Kang Zhang and Yue Pan and Yong
                 Yu",
  title =        "{SMDM}: enhancing enterprise-wide master data
                 management using semantic {Web} technologies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1594--1597",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gubanov:2009:IUR,
  author =       "Michael N. Gubanov and Lucian Popa and Howard Ho and
                 Hamid Pirahesh and Jeng-Yih Chang and Shr-Chang Chen",
  title =        "{IBM UFO} repository: object-oriented data
                 integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1598--1601",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2009:MSW,
  author =       "Huajun Chen and Bin Lu and Yuan Ni and Guotong Xie and
                 Chunying Zhou and Jinhua Mi and Zhaohui Wu",
  title =        "Mashup by surfing a {Web} of data {APIs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1602--1605",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pichler:2009:DDE,
  author =       "Reinhard Pichler and Vadim Savenkov",
  title =        "{DEMo}: data exchange modeling tool",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1606--1609",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Letchner:2009:LDW,
  author =       "Julie Letchner and Christopher R{\'e} and Magdalena
                 Balazinska and Matthai Philipose",
  title =        "Lahar demonstration: warehousing {Markovian} streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1610--1613",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2009:WAC,
  author =       "Peng Sun and Ziyang Liu and Sivaramakrishnan Natarajan
                 and Susan B. Davidson and Yi Chen",
  title =        "{WOLVES}: achieving correct provenance analysis by
                 detecting and resolving unsound workflow views",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1614--1617",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dai:2009:TTI,
  author =       "Chenyun Dai and Gabriel Ghinita and Elisa Bertino and
                 Ji-Won Byun and Ninghui Li",
  title =        "{TIAMAT}: a tool for interactive analysis of microdata
                 anonymization techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1618--1621",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2009:IIN,
  author =       "Yintao Yu and Cindy X. Lin and Yizhou Sun and Chen
                 Chen and Jiawei Han and Binbin Liao and Tianyi Wu and
                 ChengXiang Zhai and Duo Zhang and Bo Zhao",
  title =        "{iNextCube}: information network-enhanced text cube",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1622--1625",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Thusoo:2009:HWS,
  author =       "Ashish Thusoo and Joydeep Sen Sarma and Namit Jain and
                 Zheng Shao and Prasad Chakka and Suresh Anthony and Hao
                 Liu and Pete Wyckoff and Raghotham Murthy",
  title =        "{Hive}: a warehousing solution over a map-reduce
                 framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1626--1629",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Satish:2009:TEB,
  author =       "Arjun Satish and Ramesh Jain and Amarnath Gupta",
  title =        "{Tolkien}: an event based storytelling system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1630--1633",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sarigol:2009:ESN,
  author =       "Emre Sarig{\"o}l and Oriana Riva and Patrick Stuedi
                 and Gustavo Alonso",
  title =        "Enabling social networking in ad hoc networks of
                 mobile phones",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1634--1637",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bao:2009:PVD,
  author =       "Zhuowei Bao and Sarah Cohen-Boulakia and Susan B.
                 Davidson and Pierrick Girard",
  title =        "{PDiffView}: viewing the difference in provenance of
                 workflow results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1638--1641",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2009:GOW,
  author =       "Daniel Deutch and Tova Milo and Tom Yam",
  title =        "Goal-oriented {Web}-site navigation for on-line
                 shoppers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1642--1645",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pereira:2009:AWQ,
  author =       "Fernando Pereira and Anand Rajaraman and Sunita
                 Sarawagi and William Tunstall-Pedoe and Gerhard Weikum
                 and Alon Halevy",
  title =        "Answering {Web} questions using structured data: dream
                 or reality?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1646--1646",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bernstein:2009:HBB,
  author =       "Philip A. Bernstein and Daniel J. Abadi and Michael J.
                 Cafarella and Joseph M. Hellerstein and Donald Kossmann
                 and Samuel Madden",
  title =        "How best to build {Web}-scale data managers?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1647--1647",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Manegold:2009:DAE,
  author =       "Stefan Manegold and Martin L. Kersten and Peter
                 Boncz",
  title =        "Database architecture evolution: mammals flourished
                 long before dinosaurs became extinct",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1648--1653",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2009:DFR,
  author =       "Xin Luna Dong and Felix Naumann",
  title =        "Data fusion: resolving data conflicts for
                 integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1654--1655",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Heer:2009:DVS,
  author =       "Jeffrey Heer and Joseph M. Hellerstein",
  title =        "Data visualization and social data analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1656--1657",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chaudhuri:2009:KQR,
  author =       "Surajit Chaudhuri and Gautam Das",
  title =        "Keyword querying and ranking in databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1658--1659",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hadjieleftheriou:2009:EAS,
  author =       "Marios Hadjieleftheriou and Chen Li",
  title =        "Efficient approximate search on string collections",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1660--1661",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Srivastava:2009:ITD,
  author =       "Divesh Srivastava and Suresh Venkatasubramanian",
  title =        "Information theory for data management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1662--1663",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abadi:2009:COD,
  author =       "Daniel J. Abadi and Peter A. Boncz and Stavros
                 Harizopoulos",
  title =        "Column-oriented database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "2",
  number =       "2",
  pages =        "1664--1665",
  month =        aug,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:54:57 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Srivastava:2010:ERT,
  author =       "Divesh Srivastava and Lukasz Golab and Rick Greer and
                 Theodore Johnson and Joseph Seidel and Vladislav
                 Shkapenyuk and Oliver Spatscheck and Jennifer Yates",
  title =        "Enabling real time data analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1--2",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Matsudaira:2010:HEB,
  author =       "Paul Matsudaira",
  title =        "High-end biological imaging generates very large
                 {$3$D+} and dynamic datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "3--3",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cho:2010:DWD,
  author =       "Junghoo Cho and Hector Garcia-Molina",
  title =        "Dealing with {Web} data: history and look ahead",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "4--4",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
  remark =       "10-year best paper award",
}

@Article{Kemme:2010:DRT,
  author =       "Bettina Kemme and Gustavo Alonso",
  title =        "Database replication: a tale of research across
                 communities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "5--12",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
  remark =       "10-year best paper award",
}

@Article{Canim:2010:BDR,
  author =       "Mustafa Canim and Murat Kantarcio{\u{g}}lu and Bijit
                 Hore and Sharad Mehrotra",
  title =        "Building disclosure risk aware query optimizers for
                 relational databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "13--24",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Allard:2010:SPD,
  author =       "Tristan Allard and Nicolas Anciaux and Luc Bouganim
                 and Yanli Guo and Lionel Le Folgoc and Benjamin Nguyen
                 and Philippe Pucheral and Indrajit Ray and Indrakshi
                 Ray and Shaoyi Yin",
  title =        "Secure personal data servers: a vision paper",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "25--35",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fabbri:2010:PMR,
  author =       "Daniel Fabbri and Kristen LeFevre and Qiang Zhu",
  title =        "{PolicyReplay}: misconfiguration-response queries for
                 data breach reporting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "36--47",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Curino:2010:SWD,
  author =       "Carlo Curino and Evan Jones and Yang Zhang and Sam
                 Madden",
  title =        "{Schism}: a workload-driven approach to database
                 replication and partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "48--57",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qin:2010:TTS,
  author =       "Lu Qin and Jeffrey Xu Yu and Lijun Chang",
  title =        "Ten thousand {SQLs}: parallel keyword queries
                 computing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "58--69",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Thomson:2010:CDD,
  author =       "Alexander Thomson and Daniel J. Abadi",
  title =        "The case for determinism in database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "70--80",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alexe:2010:MCI,
  author =       "Bogdan Alexe and Mauricio Hern{\'a}ndez and Lucian
                 Popa and Wang-Chiew Tan",
  title =        "{MapMerge}: correlating independent schema mappings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "81--92",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Greco:2010:CTC,
  author =       "Sergio Greco and Francesca Spezzano",
  title =        "Chase termination: a constraints rewriting approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "93--104",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Marnette:2010:SDE,
  author =       "Bruno Marnette and Giansalvatore Mecca and Paolo
                 Papotti",
  title =        "Scalable data exchange with functional dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "105--116",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kanza:2010:IRS,
  author =       "Yaron Kanza and Roy Levin and Eliyahu Safra and
                 Yehoshua Sagiv",
  title =        "Interactive route search in the presence of order
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "117--128",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lang:2010:EMM,
  author =       "Willis Lang and Jignesh M. Patel",
  title =        "Energy management for {MapReduce} clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "129--139",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Baid:2010:TSK,
  author =       "Akanksha Baid and Ian Rae and Jiexing Li and AnHai
                 Doan and Jeffrey Naughton",
  title =        "Toward scalable keyword search over relational data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "140--149",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mozafari:2010:REN,
  author =       "Barzan Mozafari and Kai Zeng and Carlo Zaniolo",
  title =        "From regular expressions to nested words: unifying
                 languages and query execution for relational and {XML}
                 sequences",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "150--161",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Grust:2010:ASL,
  author =       "Torsten Grust and Jan Rittinger and Tom Schreiber",
  title =        "Avalanche-safe {LINQ} compilation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "162--172",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2010:TCF,
  author =       "Wenfei Fan and Jianzhong Li and Shuai Ma and Nan Tang
                 and Wenyuan Yu",
  title =        "Towards certain fixes with editing rules and master
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "173--184",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Herschel:2010:EMA,
  author =       "Melanie Herschel and Mauricio A. Hern{\'a}ndez",
  title =        "Explaining missing answers to {SPJUA} queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "185--196",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Beskales:2010:SRF,
  author =       "George Beskales and Ihab F. Ilyas and Lukasz Golab",
  title =        "Sampling the repairs of functional dependency
                 violations under hard constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "197--207",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Menestrina:2010:EER,
  author =       "David Menestrina and Steven Euijong Whang and Hector
                 Garcia-Molina",
  title =        "Evaluating entity resolution results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "208--219",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2010:HPD,
  author =       "Badrish Chandramouli and Jonathan Goldstein and David
                 Maier",
  title =        "High-performance dynamic pattern matching over
                 disordered streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "220--231",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Botan:2010:SMA,
  author =       "Irina Botan and Roozbeh Derakhshan and Nihal Dindar
                 and Laura Haas and Ren{\'e}e J. Miller and Nesime
                 Tatbul",
  title =        "{SECRET}: a model for analysis of the execution
                 semantics of stream processing systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "232--243",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2010:RPS,
  author =       "Haopeng Zhang and Yanlei Diao and Neil Immerman",
  title =        "Recognizing patterns in streams with imprecise
                 timestamps",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "244--255",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Neumann:2010:XRF,
  author =       "Thomas Neumann and Gerhard Weikum",
  title =        "{x-RDF-3X}: fast querying, high update rates, and
                 consistency for {RDF} databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "256--263",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2010:GPM,
  author =       "Wenfei Fan and Jianzhong Li and Shuai Ma and Nan Tang
                 and Yinghui Wu and Yunpeng Wu",
  title =        "Graph pattern matching: from intractable to polynomial
                 time",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "264--275",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yildirim:2010:GSR,
  author =       "Hilmi Yildirim and Vineet Chaoji and Mohammed J.
                 Zaki",
  title =        "{GRAIL}: scalable reachability index for large
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "276--284",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bu:2010:HEI,
  author =       "Yingyi Bu and Bill Howe and Magdalena Balazinska and
                 Michael D. Ernst",
  title =        "{HaLoop}: efficient iterative data processing on large
                 clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "285--296",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Benedikt:2010:IVV,
  author =       "Michael Benedikt and Georg Gottlob",
  title =        "The impact of virtual views on containment",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "297--308",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Terwilliger:2010:UET,
  author =       "James F. Terwilliger and Lois M. L. Delcambre and
                 David Maier and Jeremy Steinhauer and Scott Britell",
  title =        "Updatable and evolvable transforms for virtual
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "309--319",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2010:NCM,
  author =       "Daniel Deutch and Ohad Greenshpan and Tova Milo",
  title =        "Navigating in complex mashed-up applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "320--329",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Melnik:2010:DIA,
  author =       "Sergey Melnik and Andrey Gubarev and Jing Jing Long
                 and Geoffrey Romer and Shiva Shivakumar and Matt Tolton
                 and Theo Vassilakis",
  title =        "{Dremel}: interactive analysis of {Web}-scale
                 datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "330--339",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhao:2010:GQO,
  author =       "Peixiang Zhao and Jiawei Han",
  title =        "On graph query optimization in large networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "340--351",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Martinenghi:2010:PRJ,
  author =       "Davide Martinenghi and Marco Tagliasacchi",
  title =        "Proximity rank join",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "352--363",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vlachou:2010:IMI,
  author =       "Akrivi Vlachou and Christos Doulkeridis and Kjetil
                 N{\o}rv{\aa}g and Yannis Kotidis",
  title =        "Identifying the most influential data objects with
                 reverse top-$k$ queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "364--372",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2010:RTP,
  author =       "Xin Cao and Gao Cong and Christian S. Jensen",
  title =        "Retrieving top-$k$ prestige-based relevant spatial
                 {Web} objects",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "373--384",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2010:PLF,
  author =       "Lei Li and B. Aditya Prakash and Christos Faloutsos",
  title =        "Parsimonious linear fingerprinting for time series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "385--396",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2010:HTM,
  author =       "Rui Zhang and Martin Stradling",
  title =        "The {HV-tree}: a memory hierarchy aware version
                 index",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "397--408",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pramanik:2010:TRQ,
  author =       "Sakti Pramanik and Alok Watve and Chad R. Meiners and
                 Alex Liu",
  title =        "Transforming range queries to equivalent box queries
                 to optimize page access",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "409--416",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guo:2010:RLU,
  author =       "Songtao Guo and Xin Luna Dong and Divesh Srivastava
                 and Remi Zajac",
  title =        "Record linkage with uniqueness constraints and
                 erroneous values",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "417--428",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ioannou:2010:FEA,
  author =       "Ekaterini Ioannou and Wolfgang Nejdl and Claudia
                 Nieder{\'e}e and Yannis Velegrakis",
  title =        "On-the-fly entity-aware query processing in the
                 presence of linkage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "429--438",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yakout:2010:BBR,
  author =       "Mohamed Yakout and Ahmed K. Elmagarmid and Hazem
                 Elmeleegy and Mourad Ouzzani and Alan Qi",
  title =        "Behavior based record linkage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "439--448",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Han:2010:IFC,
  author =       "Wook-Shin Han and Jinsoo Lee and Minh-Duc Pham and
                 Jeffrey Xu Yu",
  title =        "{iGraph}: a framework for comparisons of disk-based
                 graph indexing techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "449--459",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schad:2010:RMC,
  author =       "J{\"o}rg Schad and Jens Dittrich and Jorge-Arnulfo
                 Quian{\'e}-Ruiz",
  title =        "Runtime measurements in the cloud: observing,
                 analyzing, and reducing variance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "460--471",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2010:PMD,
  author =       "Dawei Jiang and Beng Chin Ooi and Lei Shi and Sai Wu",
  title =        "The performance of {MapReduce}: an in-depth study",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "472--483",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kopcke:2010:EER,
  author =       "Hanna K{\"o}pcke and Andreas Thor and Erhard Rahm",
  title =        "Evaluation of entity resolution approaches on
                 real-world match problems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "484--493",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nykiel:2010:MSA,
  author =       "Tomasz Nykiel and Michalis Potamias and Chaitanya
                 Mishra and George Kollios and Nick Koudas",
  title =        "{MRShare}: sharing across multiple queries in
                 {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "494--505",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vo:2010:TET,
  author =       "Hoang Tam Vo and Chun Chen and Beng Chin Ooi",
  title =        "Towards elastic transactional cloud storage with range
                 query support",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "506--514",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dittrich:2010:HMY,
  author =       "Jens Dittrich and Jorge-Arnulfo Quian{\'e}-Ruiz and
                 Alekh Jindal and Yagiz Kargin and Vinay Setty and
                 J{\"o}rg Schad",
  title =        "{Hadoop++}: making a yellow elephant run like a
                 cheetah (without it even noticing)",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "515--529",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bruno:2010:SLR,
  author =       "Nicolas Bruno and Vivek Narasayya and Ravi
                 Ramamurthy",
  title =        "Slicing long-running queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "530--541",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tzoumas:2010:SAH,
  author =       "Kostas Tzoumas and Amol Deshpande and Christian S.
                 Jensen",
  title =        "Sharing-aware horizontal partitioning for exploiting
                 correlations during query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "542--553",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cali:2010:APO,
  author =       "Andrea Cal{\`\i} and Georg Gottlob and Andreas
                 Pieris",
  title =        "Advanced processing for ontological queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "554--565",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Parameswaran:2010:TWC,
  author =       "Aditya Parameswaran and Hector Garcia-Molina and Anand
                 Rajaraman",
  title =        "Towards the {Web} of concepts: extracting concepts
                 from large datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "566--577",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gulhane:2010:ECR,
  author =       "Pankaj Gulhane and Rajeev Rastogi and Srinivasan H.
                 Sengamedu and Ashwin Tengli",
  title =        "Exploiting content redundancy for {Web} information
                 extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "578--587",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2010:ARR,
  author =       "Bin Liu and Laura Chiticariu and Vivian Chu and H. V.
                 Jagadish and Frederick R. Reiss",
  title =        "Automatic rule refinement for information extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "588--597",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pang:2010:ETS,
  author =       "HweeHwa Pang and Xuhua Ding and Xiaokui Xiao",
  title =        "Embellishing text search queries to protect user
                 privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "598--607",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chaytor:2010:SDR,
  author =       "Rhonda Chaytor and Ke Wang",
  title =        "Small domain randomization: same privacy, more
                 utility",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "608--618",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papadopoulos:2010:NNS,
  author =       "Stavros Papadopoulos and Spiridon Bakiras and Dimitris
                 Papadias",
  title =        "Nearest neighbor search with strong location privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "619--629",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kimura:2010:UPI,
  author =       "Hideaki Kimura and Samuel Madden and Stanley B.
                 Zdonik",
  title =        "{UPI}: a primary index for uncertain databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "630--637",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2010:RCP,
  author =       "Jian Li and Amol Deshpande",
  title =        "Ranking continuous probabilistic datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "638--649",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lian:2010:SSJ,
  author =       "Xiang Lian and Lei Chen",
  title =        "Set similarity join on probabilistic data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "650--659",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Woods:2010:CED,
  author =       "Louis Woods and Jens Teubner and Gustavo Alonso",
  title =        "Complex event detection at wire speed with {FPGAs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "660--669",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fang:2010:DCG,
  author =       "Wenbin Fang and Bingsheng He and Qiong Luo",
  title =        "Database compression on graphics processors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "670--680",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Johnson:2010:ASA,
  author =       "Ryan Johnson and Ippokratis Pandis and Radu Stoica and
                 Manos Athanassoulis and Anastasia Ailamaki",
  title =        "{Aether}: a scalable approach to logging",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "681--692",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Macropol:2010:SDB,
  author =       "Kathy Macropol and Ambuj Singh",
  title =        "Scalable discovery of best clusters on large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "693--702",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Smola:2010:APT,
  author =       "Alexander Smola and Shravan Narayanamurthy",
  title =        "An architecture for parallel topic models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "703--710",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ganti:2010:KFI,
  author =       "Venkatesh Ganti and Yeye He and Dong Xin",
  title =        "{Keyword++}: a framework to improve keyword search
                 over entity databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "711--722",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2010:SMR,
  author =       "Zhenhui Li and Bolin Ding and Jiawei Han and Roland
                 Kays",
  title =        "{Swarm}: mining relaxed temporal moving object
                 clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "723--734",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2010:AUP,
  author =       "Su Chen and Beng Chin Ooi and Zhenjie Zhang",
  title =        "An adaptive updating protocol for reducing moving
                 object database workload",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "735--746",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kellaris:2010:SPC,
  author =       "Georgios Kellaris and Kyriakos Mouratidis",
  title =        "Shortest path computation on air indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "747--757",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2010:EES,
  author =       "Jia Xu and Zhenjie Zhang and Anthony K. H. Tung and Ge
                 Yu",
  title =        "Efficient and effective similarity search over
                 probabilistic data based on {Earth Mover's Distance}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "758--769",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Benedikt:2010:PXM,
  author =       "Michael Benedikt and Evgeny Kharlamov and Dan Olteanu
                 and Pierre Senellart",
  title =        "Probabilistic {XML} via {Markov Chains}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "770--781",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arumugam:2010:MRR,
  author =       "Subi Arumugam and Fei Xu and Ravi Jampani and
                 Christopher Jermaine and Luis L. Perez and Peter J.
                 Haas",
  title =        "{MCDB-R}: risk analysis in the database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "782--793",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wick:2010:SPD,
  author =       "Michael Wick and Andrew McCallum and Gerome Miklau",
  title =        "Scalable probabilistic databases with factor graphs
                 and {MCMC}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "794--804",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2010:MCF,
  author =       "Meihui Zhang and Marios Hadjieleftheriou and Beng Chin
                 Ooi and Cecilia M. Procopiuc and Divesh Srivastava",
  title =        "On multi-column foreign key discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "805--814",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cheng:2010:EEE,
  author =       "Reynold Cheng and Eric Lo and Xuan S. Yang and
                 Ming-Hay Luk and Xiang Li and Xike Xie",
  title =        "Explore or exploit?: effective strategies for
                 disambiguating large databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "815--825",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Soliman:2010:BRM,
  author =       "Mohamed A. Soliman and Ihab F. Ilyas and Mina Saleeb",
  title =        "Building ranked mashups of unstructured sources with
                 uncertain information",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "826--837",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Raissi:2010:CCS,
  author =       "Chedy Ra{\"\i}ssi and Jian Pei and Thomas Kister",
  title =        "Computing closed skycubes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "838--847",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lo:2010:GDQ,
  author =       "Eric Lo and Nick Cheng and Wing-Kai Hon",
  title =        "Generating databases for query workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "848--859",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2010:PTJ,
  author =       "Minji Wu and Laure Berti-{\'E}quille and Am{\'e}lie
                 Marian and Cecilia M. Procopiuc and Divesh Srivastava",
  title =        "Processing top-$k$ join queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "860--870",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Martinez-Palau:2010:TWR,
  author =       "Xavier Martinez-Palau and David Dominguez-Sal and
                 Josep Lluis Larriba-Pey",
  title =        "Two-way replacement selection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "871--881",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Maneth:2010:XWQ,
  author =       "Sebastian Maneth and Kim Nguyen",
  title =        "{XPath} whole query optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "882--893",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Grimsmo:2010:FOT,
  author =       "Nils Grimsmo and Truls A. Bj{\o}rklund and Magnus Lie
                 Hetland",
  title =        "Fast optimal twig joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "894--905",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Benedikt:2010:DIX,
  author =       "Michael Benedikt and James Cheney",
  title =        "Destabilizers and independence of {XML} updates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "906--917",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2010:SWH,
  author =       "Ziyang Liu and Qihong Shao and Yi Chen",
  title =        "Searching workflows with hierarchical views",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "918--927",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pandis:2010:DOT,
  author =       "Ippokratis Pandis and Ryan Johnson and Nikos
                 Hardavellas and Anastasia Ailamaki",
  title =        "Data-oriented transaction execution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "928--939",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2010:OTQ,
  author =       "Daniel Deutch and Tova Milo and Neoklis Polyzotis and
                 Tom Yam",
  title =        "Optimal top-$k$ query evaluation for weighted business
                 processes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "940--951",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2010:BSM,
  author =       "Guozhang Wang and Marcos Vaz Salles and Benjamin
                 Sowell and Xun Wang and Tuan Cao and Alan Demers and
                 Johannes Gehrke and Walker White",
  title =        "Behavioral simulations in {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "952--963",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ge:2010:TSS,
  author =       "Tingjian Ge and Stan Zdonik",
  title =        "{A*-tree}: a structure for storage and modeling of
                 uncertain multidimensional arrays",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "964--974",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aggarwal:2010:DPM,
  author =       "Charu C. Aggarwal and Yao Li and Philip S. Yu and
                 Ruoming Jin",
  title =        "On dense pattern mining in graph streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "975--984",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yiu:2010:EPD,
  author =       "Man Lung Yiu and Leong Hou U. and Simonas Saltenis and
                 Kostas Tzoumas",
  title =        "Efficient proximity detection among mobile users via
                 self-tuning policies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "985--996",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Potamias:2010:KNN,
  author =       "Michalis Potamias and Francesco Bonchi and Aristides
                 Gionis and George Kollios",
  title =        "k-nearest neighbors in uncertain graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "997--1008",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2010:MSS,
  author =       "Xin Cao and Gao Cong and Christian S. Jensen",
  title =        "Mining significant semantic locations from {GPS}
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1009--1020",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hay:2010:BAD,
  author =       "Michael Hay and Vibhor Rastogi and Gerome Miklau and
                 Dan Suciu",
  title =        "Boosting the accuracy of differentially private
                 histograms through consistency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1021--1032",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2010:UIP,
  author =       "Jianneng Cao and Panagiotis Karras and Chedy
                 Ra{\"\i}ssi and Kian-Lee Tan",
  title =        "$ \rho $-uncertainty: inference-proof transaction
                 anonymization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1033--1044",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cormode:2010:MMM,
  author =       "Graham Cormode and Divesh Srivastava and Ninghui Li
                 and Tiancheng Li",
  title =        "Minimizing minimality and maximizing utility:
                 analyzing method-based attacks on anonymized data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1045--1056",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2010:QPI,
  author =       "Daisy Zhe Wang and Michael J. Franklin and Minos
                 Garofalakis and Joseph M. Hellerstein",
  title =        "Querying probabilistic information extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1057--1067",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sen:2010:ROF,
  author =       "Prithviraj Sen and Amol Deshpande and Lise Getoor",
  title =        "Read-once functions and query evaluation in
                 probabilistic databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1068--1079",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Agrawal:2010:FUD,
  author =       "Parag Agrawal and Anish Das Sarma and Jeffrey Ullman
                 and Jennifer Widom",
  title =        "Foundations of uncertain-data integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1080--1090",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mathioudakis:2010:IAD,
  author =       "Michael Mathioudakis and Nilesh Bansal and Nick
                 Koudas",
  title =        "Identifying, attributing and describing spatial
                 bursts",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1091--1102",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kimura:2010:CCA,
  author =       "Hideaki Kimura and George Huo and Alexander Rasin and
                 Samuel Madden and Stanley B. Zdonik",
  title =        "{CORADD}: correlation aware database designer for
                 materialized views and indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1103--1113",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nanongkai:2010:RMR,
  author =       "Danupon Nanongkai and Atish Das Sarma and Ashwin Lall
                 and Richard J. Lipton and Jun Xu",
  title =        "Regret-minimizing representative databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1114--1124",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arai:2010:ACA,
  author =       "Benjamin Arai and Gautam Das and Dimitrios Gunopulos
                 and Vagelis Hristidis and Nick Koudas",
  title =        "An access cost-aware approach for object retrieval
                 over multiple sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1125--1136",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abhirama:2010:SPC,
  author =       "M. Abhirama and Sourjya Bhaumik and Atreyee Dey and
                 Harsh Shrimal and Jayant R. Haritsa",
  title =        "On the stability of plan costs and the costs of plan
                 stability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1137--1148",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Herodotou:2010:XST,
  author =       "Herodotos Herodotou and Shivnath Babu",
  title =        "{Xplus}: a {SQL}-tuning-aware query optimizer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1149--1160",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2010:GHR,
  author =       "Wenfei Fan and Jianzhong Li and Shuai Ma and Hongzhi
                 Wang and Yinghui Wu",
  title =        "Graph homomorphism revisited for graph matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1161--1172",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kandhan:2010:SFS,
  author =       "Ramakrishnan Kandhan and Nikhil Teletia and Jignesh M.
                 Patel",
  title =        "{SigMatch}: fast and scalable multi-pattern matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1173--1184",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2010:SSI,
  author =       "Shijie Zhang and Jiong Yang and Wei Jin",
  title =        "{SAPPER}: subgraph indexing and approximate matching
                 in large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1185--1194",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2010:TIS,
  author =       "Yinan Li and Bingsheng He and Robin Jun Yang and Qiong
                 Luo and Ke Yi",
  title =        "Tree indexing on solid state drives",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1195--1206",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2010:EBT,
  author =       "Sai Wu and Dawei Jiang and Beng Chin Ooi and Kun-Lung
                 Wu",
  title =        "Efficient {B-tree} based indexing for cloud data
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1207--1218",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2010:TJE,
  author =       "Jiannan Wang and Jianhua Feng and Guoliang Li",
  title =        "{Trie-join}: efficient trie-based string similarity
                 joins with edit-distance constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1219--1230",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sharifzadeh:2010:VTR,
  author =       "Mehdi Sharifzadeh and Cyrus Shahabi",
  title =        "{VoR-tree}: {R-trees} with {Voronoi} diagrams for
                 efficient processing of spatial nearest neighbor
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1231--1242",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deepak:2010:ERR,
  author =       "P. Deepak and Prasad M. Deshpande",
  title =        "Efficient {RkNN} retrieval with arbitrary non-metric
                 similarity measures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1243--1254",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2010:ESE,
  author =       "Shiming Zhang and Nikos Mamoulis and David W. Cheung
                 and Ben Kao",
  title =        "Efficient skyline evaluation over partially ordered
                 domains",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1255--1266",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wei:2010:AHO,
  author =       "Mingzhu Wei and Elke A. Rundensteiner and Murali
                 Mani",
  title =        "Achieving high output quality under limited resources
                 through structure-based spilling in {XML} streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1267--1278",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mihaylov:2010:DJO,
  author =       "Svilen R. Mihaylov and Marie Jacob and Zachary G. Ives
                 and Sudipto Guha",
  title =        "Dynamic join optimization in multi-hop wireless sensor
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1279--1290",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Akdere:2010:DSC,
  author =       "Mert Akdere and U{\u{g}}ur {\c{C}}etintemel and Eli
                 Upfal",
  title =        "Database-support for continuous prediction queries
                 over streaming data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1291--1301",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tran:2010:CAU,
  author =       "Thanh T. L. Tran and Andrew McGregor and Yanlei Diao
                 and Liping Peng and Anna Liu",
  title =        "Conditioning and aggregating uncertain data streams:
                 going beyond expectations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1302--1313",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Glavic:2010:TUB,
  author =       "Boris Glavic and Gustavo Alonso and Ren{\'e}e J.
                 Miller and Laura M. Haas",
  title =        "{TRAMP}: understanding the behavior of schema mappings
                 through provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1314--1325",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Whang:2010:ERE,
  author =       "Steven Euijong Whang and Hector Garcia-Molina",
  title =        "Entity resolution with evolving rules",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1326--1337",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Limaye:2010:ASW,
  author =       "Girija Limaye and Sunita Sarawagi and Soumen
                 Chakrabarti",
  title =        "Annotating and searching {Web} tables using entities,
                 types and relationships",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1338--1347",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bedathur:2010:IPM,
  author =       "Srikanta Bedathur and Klaus Berberich and Jens
                 Dittrich and Nikos Mamoulis and Gerhard Weikum",
  title =        "Interesting-phrase mining for ad-hoc text analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1348--1357",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2010:GDC,
  author =       "Xin Luna Dong and Laure Berti-Equille and Yifan Hu and
                 Divesh Srivastava",
  title =        "Global detection of complex copying relationships
                 between sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1358--1369",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{DeCapitanidiVimercati:2010:FLA,
  author =       "Sabrina {De Capitani di Vimercati} and Sara Foresti
                 and Sushil Jajodia and Stefano Paraboschi and
                 Pierangela Samarati",
  title =        "Fragments and loose associations: respecting privacy
                 in data publishing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1370--1381",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fusco:2010:NFF,
  author =       "Francesco Fusco and Marc Ph. Stoecklin and Michail
                 Vlachos",
  title =        "{NET-FLi}: on-the-fly compression, archiving and
                 indexing of streaming network traffic",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1382--1393",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zou:2010:SRQ,
  author =       "Qiong Zou and Huayong Wang and Robert Soul{\'e} and
                 Martin Hirzel and Henrique Andrade and Bu{\u{g}}ra
                 Gedik and Kun-Lung Wu",
  title =        "From a stream of relational queries to distributed
                 stream processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1394--1405",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mah:2010:UUA,
  author =       "James T. L. Mah and Danny C. C. Poo and Shaojiang
                 Cai",
  title =        "{UASMAs} (universal automated {SNP} mapping
                 algorithms): a set of algorithms to instantaneously map
                 {SNPs} in real time to aid functional {SNP} discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1406--1413",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Debnath:2010:FHT,
  author =       "Biplob Debnath and Sudipta Sengupta and Jin Li",
  title =        "{FlashStore}: high throughput persistent key--value
                 store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1414--1425",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xin:2010:MDA,
  author =       "Reynold S. Xin and William McLaren and Patrick
                 Dantressangle and Steve Schormann and Sam Lightstone
                 and Maria Schwenger",
  title =        "{MEET DB2}: automated database migration evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1426--1434",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Canim:2010:SBE,
  author =       "Mustafa Canim and George A. Mihaila and Bishwaranjan
                 Bhattacharjee and Kenneth A. Ross and Christian A.
                 Lang",
  title =        "{SSD} bufferpool extensions for database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1435--1446",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Loboz:2010:DWM,
  author =       "Charles Loboz and Slawek Smyl and Suman Nath",
  title =        "{DataGarage}: warehousing massive performance data on
                 commodity servers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1447--1458",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2010:CHP,
  author =       "Songting Chen",
  title =        "{Cheetah}: a high performance, custom data warehouse
                 on top of {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1459--1468",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Orair:2010:DBO,
  author =       "Gustavo H. Orair and Carlos H. C. Teixeira Wagner
                 {Meira, Jr.} and Ye Wang and Srinivasan Parthasarathy",
  title =        "Distance-based outlier detection: consolidation and
                 renewed bearing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1469--1480",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kim:2010:ALM,
  author =       "Young-Seok Kim and Heegyu Jin and Kyoung-Gu Woo",
  title =        "Adaptive logging for mobile device",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1481--1492",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pesti:2010:RSL,
  author =       "Peter Pesti and Ling Liu and Bhuvan Bamba and Arun
                 Iyengar and Matt Weber",
  title =        "{RoadTrack}: scaling location updates for mobile
                 clients on road networks with query awareness",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1493--1504",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Si:2010:CID,
  author =       "Xiance Si and Edward Y. Chang and Zolt{\'a}n
                 Gy{\"o}ngyi and Maosong Sun",
  title =        "Confucius and its intelligent disciples: integrating
                 social with search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1505--1516",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Haritsa:2010:PDQ,
  author =       "Jayant R. Haritsa",
  title =        "The {Picasso} database query optimizer visualizer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1517--1520",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2010:CED,
  author =       "Ziyang Liu and Sivaramakrishnan Natarajan and Bin He
                 and Hui-I Hsiao and Yi Chen",
  title =        "{CODS}: evolving data efficiently and scalably in
                 column oriented databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1521--1524",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sadoghi:2010:EEP,
  author =       "Mohammad Sadoghi and Martin Labrecque and Harsh Singh
                 and Warren Shum and Hans-Arno Jacobsen",
  title =        "Efficient event processing through reconfigurable
                 hardware for algorithmic trading",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1525--1528",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Levandoski:2010:CCP,
  author =       "Justin J. Levandoski and Mohamed F. Mokbel and Mohamed
                 E. Khalefa",
  title =        "{CareDB}: a context and preference-aware
                 location-based database system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1529--1532",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kossmann:2010:CMC,
  author =       "Donald Kossmann and Tim Kraska and Simon Loesing and
                 Stephan Merkli and Raman Mittal and Flavio
                 Pfaffhauser",
  title =        "{Cloudy}: a modular cloud storage system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1533--1536",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kazemitabar:2010:GSQ,
  author =       "Seyed Jalal Kazemitabar and Ugur Demiryurek and
                 Mohamed Ali and Afsin Akdogan and Cyrus Shahabi",
  title =        "Geospatial stream query processing using {Microsoft
                 SQL Server StreamInsight}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1537--1540",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dyreson:2010:UXT,
  author =       "Curtis E. Dyreson and Sourav S. Bhowmick and
                 Kirankanth Mallampalli",
  title =        "Using {XMorph} to transform {XML} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1541--1544",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2010:ACE,
  author =       "Di Wang and Elke A. Rundensteiner and Han Wang and
                 Richard T. {Ellison III}",
  title =        "Active complex event processing: applications in
                 real-time health care",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1545--1548",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schreiber:2010:TNP,
  author =       "Tom Schreiber and Simone Bonetti and Torsten Grust and
                 Manuel Mayr and Jan Rittinger",
  title =        "Thirteen new players in the team: a {FERRY}-based
                 {LINQ} to {SQL} provider",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1549--1552",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abiteboul:2010:AEC,
  author =       "Serge Abiteboul and Pierre Bourhis and Bogdan Marinoiu
                 and Alban Galland",
  title =        "{AXART}: enabling collaborative work with {AXML}
                 artifacts",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1553--1556",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{McConnell:2010:IAF,
  author =       "Christopher McConnell and Fan Ping and Jeong-Hyon
                 Hwang",
  title =        "{iFlow}: an approach for fast and reliable
                 {Internet-scale} stream processing utilizing detouring
                 and replication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1557--1560",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kantere:2010:PCT,
  author =       "Verena Kantere and Maher Manoubi and Iluju Kiringa and
                 Timos Sellis and John Mylopoulos",
  title =        "Peer coordination through distributed triggers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1561--1564",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2010:SSY,
  author =       "Hao Wu and Guoliang Li and Chen Li and Lizhu Zhou",
  title =        "{Seaform}: search-as-you-type in forms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1565--1568",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Strotgen:2010:TSE,
  author =       "Jannik Str{\"o}tgen and Michael Gertz",
  title =        "{TimeTrails}: a system for exploring spatio-temporal
                 information in documents",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1569--1572",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pound:2010:QEF,
  author =       "Jeffrey Pound and Ihab F. Ilyas and Grant Weddell",
  title =        "{QUICK}: expressive and flexible search over knowledge
                 bases and text collections",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1573--1576",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kwietniewski:2010:TXD,
  author =       "Marcin Kwietniewski and Jarek Gryz and Stephanie
                 Hazlewood and Paul {Van Run}",
  title =        "Transforming {XML} documents as schemas evolve",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1577--1580",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2010:XCT,
  author =       "Ziyang Liu and Sivaramakrishnan Natarajan and Peng Sun
                 and Stephen Booher and Tim Meehan and Robert Winkler
                 and Yi Chen",
  title =        "{XSACT}: a comparison tool for structured search
                 results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1581--1584",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abdessalem:2010:OLT,
  author =       "Talel Abdessalem and Bogdan Cautis and Nora
                 Derouiche",
  title =        "{ObjectRunner}: lightweight, targeted extraction and
                 querying of structured {Web} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1585--1588",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elbassuoni:2010:RRW,
  author =       "Shady Elbassuoni and Katja Hose and Steffen Metzger
                 and Ralf Schenkel",
  title =        "{ROXXI}: {Reviving} witness {dOcuments} to {eXplore
                 eXtracted Information}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1589--1592",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Termehchy:2010:EUD,
  author =       "Arash Termehchy and Marianne Winslett",
  title =        "{EXTRUCT}: using deep structural information in {XML}
                 keyword search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1593--1596",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Akbarnejad:2010:SQR,
  author =       "Javad Akbarnejad and Gloria Chatzopoulou and Magdalini
                 Eirinaki and Suju Koshy and Sarika Mittal and Duc On
                 and Neoklis Polyzotis and Jothi S. Vindhiya Varman",
  title =        "{SQL QueRIE} recommendations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1597--1600",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ang:2010:PCM,
  author =       "Hock Hee Ang and Vivekanand Gopalkrishnan and Wee
                 Keong Ng and Steven C. H. Hoi",
  title =        "{P2PDocTagger}: content management through automated
                 {P2P} collaborative tagging",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1601--1604",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Setty:2010:IEI,
  author =       "Vinay Setty and Srikanta Bedathur and Klaus Berberich
                 and Gerhard Weikum",
  title =        "{InZeit}: efficiently identifying insightful time
                 points",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1605--1608",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2010:IIT,
  author =       "Aixin Sun and Sourav S. Bhowmick and Yao Liu",
  title =        "{iAVATAR}: an interactive tool for finding and
                 visualizing visual-representative tags in image
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1609--1612",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kabisch:2010:DWI,
  author =       "Thomas Kabisch and Eduard C. Dragut and Clement Yu and
                 Ulf Leser",
  title =        "Deep {Web} integration with {VisQI}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1613--1616",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2010:SST,
  author =       "Xin Luna Dong and Laure Berti-Equille and Yifan Hu and
                 Divesh Srivastava",
  title =        "{SOLOMON}: seeking the truth via copying detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1617--1620",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hentschel:2010:JTD,
  author =       "Martin Hentschel and Laura Haas and Ren{\'e}e J.
                 Miller",
  title =        "Just-in-time data integration in action",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1621--1624",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alexandrov:2010:MPD,
  author =       "Alexander Alexandrov and Max Heimel and Volker Markl
                 and Dominic Battr{\'e} and Fabian Hueske and Erik
                 Nijkamp and Stephan Ewen and Odej Kao and Daniel
                 Warneke",
  title =        "Massively parallel data analysis with {PACTs} on
                 {Nephele}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1625--1628",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Middelfart:2010:UST,
  author =       "Morten Middelfart and Torben Bach Pedersen",
  title =        "Using sentinel technology in the {TARGIT BI} suite",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1629--1632",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gunnemann:2010:CIC,
  author =       "Stephan G{\"u}nnemann and Ines F{\"a}rber and Hardy
                 Kremer and Thomas Seidl",
  title =        "{CoDA}: interactive cluster based concept discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1633--1636",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bergamaschi:2010:KSK,
  author =       "Sonia Bergamaschi and Elton Domnori and Francesco
                 Guerra and Mirko Orsini and Raquel Trillo Lado and
                 Yannis Velegrakis",
  title =        "{Keymantic}: semantic keyword-based searching in data
                 integration systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1637--1640",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Golab:2010:DAE,
  author =       "Lukasz Golab and Howard Karloff and Flip Korn and
                 Divesh Srivastava",
  title =        "Data {Auditor}: exploring data quality and semantics
                 using pattern tableaux",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1641--1644",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nori:2010:DCP,
  author =       "Anil K. Nori",
  title =        "Distributed caching platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1645--1646",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Agrawal:2010:BDC,
  author =       "Divyakant Agrawal and Sudipto Das and Amr {El
                 Abbadi}",
  title =        "Big data and cloud computing: new wine or just new
                 bottles?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1647--1648",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Samet:2010:TSS,
  author =       "Hanan Samet",
  title =        "Techniques for similarity searching in multimedia
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1649--1650",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Etzion:2010:EPP,
  author =       "Opher Etzion",
  title =        "Event processing: past, present and future",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1651--1652",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Renz:2010:SSM,
  author =       "Matthias Renz and Reynold Cheng and Hans-Peter
                 Kriegel",
  title =        "Similarity search and mining in uncertain databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1653--1654",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Muthukrishnan:2010:DMM,
  author =       "S. Muthukrishnan",
  title =        "Data management and mining in {Internet AD} systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "3",
  number =       "1--2",
  pages =        "1655--1656",
  month =        sep,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:02 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kling:2010:GEE,
  author =       "Patrick Kling and M. Tamer {\"O}zsu and Khuzaima
                 Daudjee",
  title =        "Generating efficient execution plans for vertically
                 partitioned {XML} databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "1",
  pages =        "1--11",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lian:2010:GFH,
  author =       "Xiang Lian and Lei Chen",
  title =        "A generic framework for handling uncertain data with
                 local correlations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "1",
  pages =        "12--21",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khoussainova:2010:SCA,
  author =       "Nodira Khoussainova and YongChul Kwon and Magdalena
                 Balazinska and Dan Suciu",
  title =        "{SnipSuggest}: context-aware autocompletion for
                 {SQL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "1",
  pages =        "22--33",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Meliou:2010:CCR,
  author =       "Alexandra Meliou and Wolfgang Gatterbauer and
                 Katherine F. Moore and Dan Suciu",
  title =        "The complexity of causality and responsibility for
                 query answers and non-answers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "1",
  pages =        "34--45",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sagy:2010:DTQ,
  author =       "Guy Sagy and Daniel Keren and Izchak Sharfman and
                 Assaf Schuster",
  title =        "Distributed threshold querying of general functions by
                 a difference of monotonic representation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "46--57",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2010:TBD,
  author =       "Nan Wang and Jingbo Zhang and Kian-Lee Tan and Anthony
                 K. H. Tung",
  title =        "On triangulation-based dense neighborhood graph
                 discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "58--68",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rice:2010:GIR,
  author =       "Michael Rice and Vassilis J. Tsotras",
  title =        "Graph indexing of road networks for shortest path
                 queries with label restrictions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "69--80",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qian:2010:CUF,
  author =       "Li Qian and Kristen LeFevre and H. V. Jagadish",
  title =        "{CRIUS}: user-friendly database design",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "81--92",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rocha-Junior:2010:EPT,
  author =       "Jo{\~a}o B. Rocha-Junior and Akrivi Vlachou and
                 Christos Doulkeridis and Kjetil N{\o}rv{\aa}g",
  title =        "Efficient processing of top-$k$ spatial preference
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "93--104",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Grund:2010:HMM,
  author =       "Martin Grund and Jens Kr{\"u}ger and Hasso Plattner
                 and Alexander Zeier and Philippe Cudre-Mauroux and
                 Samuel Madden",
  title =        "{HYRISE}: a main memory hybrid storage engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "105--116",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Curino:2010:URI,
  author =       "Carlo A. Curino and Hyun Jin Moon and Alin Deutsch and
                 Carlo Zaniolo",
  title =        "Update rewriting and integrity constraint maintenance
                 in a schema evolution support system: {PRISM++}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "117--128",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Oro:2010:SEX,
  author =       "Ermelinda Oro and Massimo Ruffolo and Steffen Staab",
  title =        "{SXPath}: extending {XPath} towards spatial querying
                 on {Web} documents",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "129--140",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2010:PPP,
  author =       "Mingxuan Yuan and Lei Chen and Philip S. Yu",
  title =        "Personalized privacy protection in social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "2",
  pages =        "141--150",
  month =        nov,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:15 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Toda:2010:PAA,
  author =       "Guilherme A. Toda and Eli Cortez and Altigran S. da
                 Silva and Edleno de Moura",
  title =        "A probabilistic approach for automatically filling
                 form-based {Web} interfaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "3",
  pages =        "151--160",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:16 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papadimitriou:2010:OUB,
  author =       "Panagiotis Papadimitriou and Hector Garcia-Molina and
                 Ali Dasdan and Santanu Kolay",
  title =        "Output {URL} bidding",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "3",
  pages =        "161--172",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:16 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bahmani:2010:FIP,
  author =       "Bahman Bahmani and Abdur Chowdhury and Ashish Goel",
  title =        "Fast incremental and personalized {PageRank}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "3",
  pages =        "173--184",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:16 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we analyze the efficiency of Monte
                 Carlo methods for incremental computation of PageRank,
                 personalized PageRank, and similar random walk based
                 methods (with focus on SALSA), on large-scale
                 dynamically evolving social networks. We assume that
                 the graph of friendships is stored in distributed
                 shared memory, as is the case for large social networks
                 such as Twitter.\par

                 For global PageRank, we assume that the social network
                 has $n$ nodes, and $m$ adversarially chosen edges
                 arrive in a random order. We show that with a reset
                 probability of $ \epsilon $, the expected total work
                 needed to maintain an accurate estimate (using the
                 Monte Carlo method) of the PageRank of every node at
                 all times is $ O(n \ln m / \epsilon^2)$. This is
                 significantly better than all known bounds for
                 incremental PageRank. For instance, if we naively
                 recompute the PageRanks as each edge arrives, the
                 simple power iteration method needs $ \Omega (m^2 / \ln
                 (1 / (1 - \epsilon)))$ total time and the Monte Carlo
                 method needs $ O(m n / \epsilon)$ total time; both are
                 prohibitively expensive. We also show that we can
                 handle deletions equally efficiently.\par

                 We then study the computation of the top $k$
                 personalized PageRanks starting from a seed node,
                 assuming that personalized PageRanks follow a power-law
                 with exponent $ < 1$. We show that if we store $ R > q
                 \ln n$ random walks starting from every node for large
                 enough constant $q$ (using the approach outlined for
                 global PageRank), then the expected number of calls
                 made to the distributed social network database is $
                 O(k / (R^{(1 - \alpha) / \alpha }))$. We also present
                 experimental results from the social networking site,
                 Twitter, verifying our assumptions and analyses. The
                 overall result is that this algorithm is fast enough
                 for real-time queries over a dynamic social network.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2010:QES,
  author =       "Jongwuk Lee and Seung-won Hwang",
  title =        "{QSkycube}: efficient skycube computation using
                 point-based space partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "3",
  pages =        "185--196",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:16 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2010:ZEI,
  author =       "Bin Liu and Chee-Yong Chan",
  title =        "{ZINC}: efficient indexing for skyline computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "3",
  pages =        "197--207",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:16 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rastogi:2011:LSC,
  author =       "Vibhor Rastogi and Nilesh Dalvi and Minos
                 Garofalakis",
  title =        "Large-scale collective entity matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "4",
  pages =        "208--218",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:17 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dalvi:2011:AWL,
  author =       "Nilesh Dalvi and Ravi Kumar and Mohamed Soliman",
  title =        "Automatic wrappers for large scale {Web} extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "4",
  pages =        "219--230",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:17 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2011:FSM,
  author =       "Xintian Yang and Srinivasan Parthasarathy and P.
                 Sadayappan",
  title =        "Fast sparse matrix-vector multiplication on {GPUs}:
                 implications for graph mining",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "4",
  pages =        "231--242",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:17 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rao:2011:UPB,
  author =       "Jun Rao and Eugene J. Shekita and Sandeep Tata",
  title =        "Using {Paxos} to build a scalable, consistent, and
                 highly available datastore",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "4",
  pages =        "243--254",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:17 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2011:FSI,
  author =       "Bolin Ding and Arnd Christian K{\"o}nig",
  title =        "Fast set intersection in memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "4",
  pages =        "255--266",
  month =        jan,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:17 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Parameswaran:2011:HAG,
  author =       "Aditya Parameswaran and Anish Das Sarma and Hector
                 Garcia-Molina and Neoklis Polyzotis and Jennifer
                 Widom",
  title =        "Human-assisted graph search: it's okay to ask
                 questions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "5",
  pages =        "267--278",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:18 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yakout:2011:GDR,
  author =       "Mohamed Yakout and Ahmed K. Elmagarmid and Jennifer
                 Neville and Mourad Ouzzani and Ihab F. Ilyas",
  title =        "Guided data repair",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "5",
  pages =        "279--289",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:18 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Venetis:2011:HLD,
  author =       "Petros Venetis and Hector Gonzalez and Christian S.
                 Jensen and Alon Halevy",
  title =        "Hyper-local, directions-based ranking of places",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "5",
  pages =        "290--301",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:18 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koc:2011:IMC,
  author =       "M. Levent Koc and Christopher R{\'e}",
  title =        "Incrementally maintaining classification using an
                 {RDBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "5",
  pages =        "302--313",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:18 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2011:HTT,
  author =       "Bingsheng He and Jeffrey Xu Yu",
  title =        "High-throughput transaction executions on graphics
                 processors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "5",
  pages =        "314--325",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:18 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2011:DIQ,
  author =       "Zhao Cao and Charles Sutton and Yanlei Diao and
                 Prashant Shenoy",
  title =        "Distributed inference and query processing for {RFID}
                 tracking and monitoring",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "5",
  pages =        "326--337",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:55:18 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2011:SJS,
  author =       "Hongrae Lee and Raymond T. Ng and Kyuseok Shim",
  title =        "Similarity join size estimation using locality
                 sensitive hashing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "6",
  pages =        "338--349",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:45:07 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2011:QEB,
  author =       "Ziyang Liu and Sivaramakrishnan Natarajan and Yi
                 Chen",
  title =        "Query expansion based on clustered results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "6",
  pages =        "350--361",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:45:07 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dash:2011:CSP,
  author =       "Debabrata Dash and Neoklis Polyzotis and Anastasia
                 Ailamaki",
  title =        "{CoPhy}: a scalable, portable, and interactive index
                 advisor for large workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "6",
  pages =        "362--372",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:45:07 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Niu:2011:TSS,
  author =       "Feng Niu and Christopher R{\'e} and AnHai Doan and
                 Jude Shavlik",
  title =        "{Tuffy}: scaling up statistical inference in {Markov}
                 logic networks using an {RDBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "6",
  pages =        "373--384",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:45:07 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jahani:2011:AOM,
  author =       "Eaman Jahani and Michael J. Cafarella and Christopher
                 R{\'e}",
  title =        "Automatic optimization for {MapReduce} programs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "6",
  pages =        "385--396",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:45:07 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2011:STG,
  author =       "De-Nian Yang and Yi-Ling Chen and Wang-Chien Lee and
                 Ming-Syan Chen",
  title =        "On social-temporal group query with acquaintance
                 constraint",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "6",
  pages =        "397--408",
  month =        mar,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 13 14:45:07 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nguyen:2011:SPO,
  author =       "Hoa Nguyen and Ariel Fuxman and Stelios Paparizos and
                 Juliana Freire and Rakesh Agrawal",
  title =        "Synthesizing products for online catalogs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "7",
  pages =        "409--418",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Jun 7 19:31:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Floratou:2011:COS,
  author =       "Avrilia Floratou and Jignesh M. Patel and Eugene J.
                 Shekita and Sandeep Tata",
  title =        "Column-oriented storage techniques for {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "7",
  pages =        "419--429",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Jun 7 19:31:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lomet:2011:IPC,
  author =       "David Lomet and Kostas Tzoumas and Michael Zwilling",
  title =        "Implementing performance competitive logical
                 recovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "7",
  pages =        "430--439",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Jun 7 19:31:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Machanavajjhala:2011:PSR,
  author =       "Ashwin Machanavajjhala and Aleksandra Korolova and
                 Atish Das Sarma",
  title =        "Personalized social recommendations: accurate or
                 private",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "7",
  pages =        "440--450",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Jun 7 19:31:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Capannini:2011:EDW,
  author =       "Gabriele Capannini and Franco Maria Nardini and
                 Raffaele Perego and Fabrizio Silvestri",
  title =        "Efficient diversification of {Web} search results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "7",
  pages =        "451--459",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Jun 7 19:31:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{DeFrancisciMorales:2011:SCM,
  author =       "Gianmarco {De Francisci Morales} and Aristides Gionis
                 and Mauro Sozio",
  title =        "Social content matching in {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "7",
  pages =        "460--469",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Jun 7 19:31:12 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ao:2011:EPL,
  author =       "Naiyong Ao and Fan Zhang and Di Wu and Douglas S.
                 Stones and Gang Wang and Xiaoguang Liu and Jing Liu and
                 Sheng Lin",
  title =        "Efficient parallel lists intersection and index
                 compression algorithms using graphics processing
                 units",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "8",
  pages =        "470--481",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:33 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zou:2011:GAS,
  author =       "Lei Zou and Jinghui Mo and Lei Chen and M. Tamer
                 {\"O}zsu and Dongyan Zhao",
  title =        "{gStore}: answering {SPARQL} queries via subgraph
                 matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "8",
  pages =        "482--493",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:33 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Das:2011:ALE,
  author =       "Sudipto Das and Shoji Nishimura and Divyakant Agrawal
                 and Amr {El Abbadi}",
  title =        "{Albatross}: lightweight elasticity in shared storage
                 databases for the cloud using live data migration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "8",
  pages =        "494--505",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:33 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nutanong:2011:IHD,
  author =       "Sarana Nutanong and Edwin H. Jacox and Hanan Samet",
  title =        "An incremental {Hausdorff} distance calculation
                 algorithm",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "8",
  pages =        "506--517",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:33 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Blaustein:2011:SPP,
  author =       "Barbara Blaustein and Adriane Chapman and Len Seligman
                 and M. David Allen and Arnon Rosenthal",
  title =        "Surrogate parenthood: protected and informative
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "8",
  pages =        "518--525",
  month =        may,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:33 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Venetis:2011:RST,
  author =       "Petros Venetis and Alon Halevy and Jayant Madhavan and
                 Marius Pasca and Warren Shen and Fei Wu and Gengxin
                 Miao and Chung Wu",
  title =        "Recovering semantics of tables on the web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "9",
  pages =        "528--538",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Neumann:2011:ECE,
  author =       "Thomas Neumann",
  title =        "Efficiently compiling efficient query plans for modern
                 hardware",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "9",
  pages =        "539--550",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jin:2011:DCR,
  author =       "Ruoming Jin and Lin Liu and Bolin Ding and Haixun
                 Wang",
  title =        "Distance-constraint reachability computation in
                 uncertain graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "9",
  pages =        "551--562",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chi:2011:IIC,
  author =       "Yun Chi and Hyun Jin Moon and Hakan
                 Hacig{\"u}m{\"u}s",
  title =        "{iCBS}: incremental cost-based scheduling under
                 piecewise linear {SLAs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "9",
  pages =        "563--574",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eltabakh:2011:CFD,
  author =       "Mohamed Y. Eltabakh and Yuanyuan Tian and Fatma
                 {\"O}zcan and Rainer Gemulla and Aljoscha Krettek and
                 John McPherson",
  title =        "{CoHadoop}: flexible data placement and its
                 exploitation in {Hadoop}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "9",
  pages =        "575--585",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Idreos:2011:MWC,
  author =       "Stratos Idreos and Stefan Manegold and Harumi Kuno and
                 Goetz Graefe",
  title =        "Merging what's cracked, cracking what's merged:
                 adaptive indexing in main-memory column-stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "9",
  pages =        "586--597",
  month =        jun,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2011:PTR,
  author =       "Chonghai Wang and Li Yan Yuan and Jia-Huai You and
                 Osmar R. Zaiane and Jian Pei",
  title =        "On pruning for top-$k$ ranking in uncertain
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "10",
  pages =        "598--609",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pandis:2011:PPL,
  author =       "Ippokratis Pandis and Pinar T{\"o}z{\"u}n and Ryan
                 Johnson and Anastasia Ailamaki",
  title =        "{PLP}: page latch-free shared-everything {OLTP}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "10",
  pages =        "610--621",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2011:EMH,
  author =       "Jiannan Wang and Guoliang Li and Jeffrey Xu Yu and
                 Jianhua Feng",
  title =        "Entity matching: how similar is similar",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "10",
  pages =        "622--633",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2011:ACE,
  author =       "Di Wang and Elke A. Rundensteiner and Richard T.
                 {Ellison III}",
  title =        "Active complex event processing over event streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "10",
  pages =        "634--645",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Budak:2011:STA,
  author =       "Ceren Budak and Divyakant Agrawal and Amr {El
                 Abbadi}",
  title =        "Structural trend analysis for online social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "10",
  pages =        "646--656",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kimura:2011:CAP,
  author =       "Hideaki Kimura and Vivek Narasayya and Manoj Syamala",
  title =        "Compression aware physical database design",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "10",
  pages =        "657--668",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bernecker:2011:EPR,
  author =       "Thomas Bernecker and Tobias Emrich and Hans-Peter
                 Kriegel and Matthias Renz and Stefan Zankl and Andreas
                 Z{\"u}fle",
  title =        "Efficient probabilistic reverse nearest neighbor query
                 processing on uncertain data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "10",
  pages =        "669--680",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kargar:2011:KSG,
  author =       "Mehdi Kargar and Aijun An",
  title =        "Keyword search in graphs: finding $r$-cliques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "4",
  number =       "10",
  pages =        "681--692",
  month =        jul,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 5 17:23:34 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fabbri:2011:EBA,
  author =       "Daniel Fabbri and Kristen LeFevre",
  title =        "Explanation-based auditing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "1",
  pages =        "1--12",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:06 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To comply with emerging privacy laws and regulations,
                 it has become common for applications like electronic
                 health records systems (EHRs) to collect access logs,
                 which record each time a user (e.g., a hospital
                 employee) accesses a piece of sensitive data (e.g., a
                 patient record). Using the access log, it is easy to
                 answer simple queries (e.g., Who accessed Alice's
                 medical record?), but this often does not provide
                 enough information. In addition to learning who
                 accessed their medical records, patients will likely
                 want to understand why each access occurred. In this
                 paper, we introduce the problem of generating
                 explanations for individual records in an access log.
                 The problem is motivated by user-centric auditing
                 applications, and it also provides a novel approach to
                 misuse detection. We develop a framework for modeling
                 explanations which is based on a fundamental
                 observation: For certain classes of databases,
                 including EHRs, the reason for most data accesses can
                 be inferred from data stored elsewhere in the database.
                 For example, if Alice has an appointment with Dr. Dave,
                 this information is stored in the database, and it
                 explains why Dr. Dave looked at Alice's record. Large
                 numbers of data accesses can be explained using general
                 forms called explanation templates. Rather than
                 requiring an administrator to manually specify
                 explanation templates, we propose a set of algorithms
                 for automatically discovering frequent templates from
                 the database (i.e., those that explain a large number
                 of accesses). We also propose techniques for inferring
                 collaborative user groups, which can be used to enhance
                 the quality of the discovered explanations. Finally, we
                 have evaluated our proposed techniques using an access
                 log and data from the University of Michigan Health
                 System. Our results demonstrate that in practice we can
                 provide explanations for over 94\% of data accesses in
                 the log.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Marcus:2011:HPS,
  author =       "Adam Marcus and Eugene Wu and David Karger and Samuel
                 Madden and Robert Miller",
  title =        "Human-powered sorts and joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "1",
  pages =        "13--24",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:06 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Crowdsourcing markets like Amazon's Mechanical Turk
                 (MTurk) make it possible to task people with small
                 jobs, such as labeling images or looking up phone
                 numbers, via a programmatic interface. MTurk tasks for
                 processing datasets with humans are currently designed
                 with significant reimplementation of common workflows
                 and ad-hoc selection of parameters such as price to pay
                 per task. We describe how we have integrated crowds
                 into a declarative workflow engine called Qurk to
                 reduce the burden on workflow designers. In this paper,
                 we focus on how to use humans to compare items for
                 sorting and joining data, two of the most common
                 operations in DBMSs. We describe our basic query
                 interface and the user interface of the tasks we post
                 to MTurk. We also propose a number of optimizations,
                 including task batching, replacing pairwise comparisons
                 with numerical ratings, and pre-filtering tables before
                 joining them, which dramatically reduce the overall
                 cost of running sorts and joins on the crowd. In an
                 experiment joining two sets of images, we reduce the
                 overall cost from \$67 in a naive implementation to
                 about \$3, without substantially affecting accuracy or
                 latency. In an end-to-end experiment, we reduced cost
                 by a factor of 14.5.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cormode:2011:VCS,
  author =       "Graham Cormode and Justin Thaler and Ke Yi",
  title =        "Verifying computations with streaming interactive
                 proofs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "1",
  pages =        "25--36",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:06 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "When computation is outsourced, the data owner would
                 like to be assured that the desired computation has
                 been performed correctly by the service provider. In
                 theory, proof systems can give the necessary assurance,
                 but prior work is not sufficiently scalable or
                 practical. In this paper, we develop new proof
                 protocols for verifying computations which are
                 streaming in nature: the verifier (data owner) needs
                 only logarithmic space and a single pass over the
                 input, and after observing the input follows a simple
                 protocol with a prover (service provider) that takes
                 logarithmic communication spread over a logarithmic
                 number of rounds. These ensure that the computation is
                 performed correctly: that the service provider has not
                 made any errors or missed out some data. The guarantee
                 is very strong: even if the service provider
                 deliberately tries to cheat, there is only vanishingly
                 small probability of doing so undetected, while a
                 correct computation is always accepted. We first
                 observe that some theoretical results can be modified
                 to work with streaming verifiers, showing that there
                 are efficient protocols for problems in the complexity
                 classes NP and NC. Our main results then seek to bridge
                 the gap between theory and practice by developing
                 usable protocols for a variety of problems of central
                 importance in streaming and database processing. All
                 these problems require linear space in the traditional
                 streaming model, and therefore our protocols
                 demonstrate that adding a prover can exponentially
                 reduce the effort needed by the verifier. Our
                 experimental results show that our protocols are
                 practical and scalable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lin:2011:MOI,
  author =       "Dan Lin and Christian S. Jensen and Rui Zhang and Lu
                 Xiao and Jiaheng Lu",
  title =        "A moving-object index for efficient query processing
                 with peer-wise location privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "1",
  pages =        "37--48",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:06 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the growing use of location-based services,
                 location privacy attracts increasing attention from
                 users, industry, and the research community. While
                 considerable effort has been devoted to inventing
                 techniques that prevent service providers from knowing
                 a user's exact location, relatively little attention
                 has been paid to enabling so-called peer-wise
                 privacy---the protection of a user's location from
                 unauthorized peer users. This paper identifies an
                 important efficiency problem in existing peer-privacy
                 approaches that simply apply a filtering step to
                 identify users that are located in a query range, but
                 that do not want to disclose their location to the
                 querying peer. To solve this problem, we propose a
                 novel, privacy-policy enabled index called the PEB-tree
                 that seamlessly integrates location proximity and
                 policy compatibility. We propose efficient algorithms
                 that use the PEB-tree for processing privacy-aware
                 range and $k$ NN queries. Extensive experiments suggest
                 that the PEB-tree enables efficient query processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mansour:2011:EES,
  author =       "Essam Mansour and Amin Allam and Spiros Skiadopoulos
                 and Panos Kalnis",
  title =        "{ERA}: efficient serial and parallel suffix tree
                 construction for very long strings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "1",
  pages =        "49--60",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:06 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The suffix tree is a data structure for indexing
                 strings. It is used in a variety of applications such
                 as bioinformatics, time series analysis, clustering,
                 text editing and data compression. However, when the
                 string and the resulting suffix tree are too large to
                 fit into the main memory, most existing construction
                 algorithms become very inefficient. This paper presents
                 a disk-based suffix tree construction method, called
                 Elastic Range (ERa), which works efficiently with very
                 long strings that are much larger than the available
                 memory. ERa partitions the tree construction process
                 horizontally and vertically and minimizes I/Os by
                 dynamically adjusting the horizontal partitions
                 independently for each vertical partition, based on the
                 evolving shape of the tree and the available memory.
                 Where appropriate, ERa also groups vertical partitions
                 together to amortize the I/O cost. We developed a
                 serial version; a parallel version for shared-memory
                 and shared-disk multi-core systems; and a parallel
                 version for shared-nothing architectures. ERa indexes
                 the entire human genome in 19 minutes on an ordinary
                 desktop computer. For comparison, the fastest existing
                 method needs 15 minutes using 1024 CPUs on an IBM
                 BlueGene supercomputer.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Krueger:2011:FUR,
  author =       "Jens Krueger and Changkyu Kim and Martin Grund and
                 Nadathur Satish and David Schwalb and Jatin Chhugani
                 and Hasso Plattner and Pradeep Dubey and Alexander
                 Zeier",
  title =        "Fast updates on read-optimized databases using
                 multi-core {CPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "1",
  pages =        "61--72",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:06 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Read-optimized columnar databases use differential
                 updates to handle writes by maintaining a separate
                 write-optimized delta partition which is periodically
                 merged with the read-optimized and compressed main
                 partition. This merge process introduces significant
                 overheads and unacceptable downtimes in update
                 intensive systems, aspiring to combine transactional
                 and analytical workloads into one system. In the first
                 part of the paper, we report data analyses of 12 SAP
                 Business Suite customer systems. In the second half, we
                 present an optimized merge process reducing the merge
                 overhead of current systems by a factor of 30. Our
                 linear-time merge algorithm exploits the underlying
                 high compute and bandwidth resources of modern
                 multi-core CPUs with architecture-aware optimizations
                 and efficient parallelization. This enables compressed
                 in-memory column stores to handle the transactional
                 update rate required by enterprise applications, while
                 keeping properties of read-optimized databases for
                 analytic-style queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Goyal:2011:DBA,
  author =       "Amit Goyal and Francesco Bonchi and Laks V. S.
                 Lakshmanan",
  title =        "A data-based approach to social influence
                 maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "1",
  pages =        "73--84",
  month =        sep,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:06 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Influence maximization is the problem of finding a set
                 of users in a social network, such that by targeting
                 this set, one maximizes the expected spread of
                 influence in the network. Most of the literature on
                 this topic has focused exclusively on the social graph,
                 overlooking historical data, i.e., traces of past
                 action propagations. In this paper, we study influence
                 maximization from a novel data-based perspective. In
                 particular, we introduce a new model, which we call
                 credit distribution, that directly leverages available
                 propagation traces to learn how influence flows in the
                 network and uses this to estimate expected influence
                 spread. Our approach also learns the different levels
                 of influence-ability of users, and it is time-aware in
                 the sense that it takes the temporal nature of
                 influence into account. We show that influence
                 maximization under the credit distribution model is NP
                 -hard and that the function that defines expected
                 spread under our model is submodular. Based on these,
                 we develop an approximation algorithm for solving the
                 influence maximization problem that at once enjoys high
                 accuracy compared to the standard approach, while being
                 several orders of magnitude faster and more scalable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pavlo:2011:PMO,
  author =       "Andrew Pavlo and Evan P. C. Jones and Stanley Zdonik",
  title =        "On predictive modeling for optimizing transaction
                 execution in parallel {OLTP} systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "2",
  pages =        "85--96",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:08 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A new emerging class of parallel database management
                 systems (DBMS) is designed to take advantage of the
                 partitionable workloads of on-line transaction
                 processing (OLTP) applications [23, 20]. Transactions
                 in these systems are optimized to execute to completion
                 on a single node in a shared-nothing cluster without
                 needing to coordinate with other nodes or use expensive
                 concurrency control measures [18]. But some OLTP
                 applications cannot be partitioned such that all of
                 their transactions execute within a single-partition in
                 this manner. These distributed transactions access data
                 not stored within their local partitions and
                 subsequently require more heavy-weight concurrency
                 control protocols. Further difficulties arise when the
                 transaction's execution properties, such as the number
                 of partitions it may need to access or whether it will
                 abort, are not known beforehand. The DBMS could
                 mitigate these performance issues if it is provided
                 with additional information about transactions. Thus,
                 in this paper we present a Markov model-based approach
                 for automatically selecting which optimizations a DBMS
                 could use, namely (1) more efficient concurrency
                 control schemes, (2) intelligent scheduling, (3)
                 reduced undo logging, and (4) speculative execution. To
                 evaluate our techniques, we implemented our models and
                 integrated them into a parallel, main-memory OLTP DBMS
                 to show that we can improve the performance of
                 applications with diverse workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Goasdoue:2011:VSS,
  author =       "Fran{\c{c}}ois Goasdou{\'e} and Konstantinos Karanasos
                 and Julien Leblay and Ioana Manolescu",
  title =        "View selection in {Semantic Web} databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "2",
  pages =        "97--108",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:08 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We consider the setting of a Semantic Web database,
                 containing both explicit data encoded in RDF triples,
                 and implicit data, implied by the RDF semantics. Based
                 on a query workload, we address the problem of
                 selecting a set of views to be materialized in the
                 database, minimizing a combination of query processing,
                 view storage, and view maintenance costs. Starting from
                 an existing relational view selection method, we devise
                 new algorithms for recommending view sets, and show
                 that they scale significantly beyond the existing
                 relational ones when adapted to the RDF context. To
                 account for implicit triples in query answers, we
                 propose a novel RDF query reformulation algorithm and
                 an innovative way of incorporating it into view
                 selection in order to avoid a combinatorial explosion
                 in the complexity of the selection process. The
                 interest of our techniques is demonstrated through a
                 set of experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jestes:2011:BWH,
  author =       "Jeffrey Jestes and Ke Yi and Feifei Li",
  title =        "Building wavelet histograms on large data in
                 {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "2",
  pages =        "109--120",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:08 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MapReduce is becoming the de facto framework for
                 storing and processing massive data, due to its
                 excellent scalability, reliability, and elasticity. In
                 many MapReduce applications, obtaining a compact
                 accurate summary of data is essential. Among various
                 data summarization tools, histograms have proven to be
                 particularly important and useful for summarizing data,
                 and the wavelet histogram is one of the most widely
                 used histograms. In this paper, we investigate the
                 problem of building wavelet histograms efficiently on
                 large datasets in MapReduce. We measure the efficiency
                 of the algorithms by both end-to-end running time and
                 communication cost. We demonstrate straightforward
                 adaptations of existing exact and approximate methods
                 for building wavelet histograms to MapReduce clusters
                 are highly inefficient. To that end, we design new
                 algorithms for computing exact and approximate wavelet
                 histograms and discuss their implementation in
                 MapReduce. We illustrate our techniques in Hadoop, and
                 compare to baseline solutions with extensive
                 experiments performed in a heterogeneous Hadoop cluster
                 of 16 nodes, using large real and synthetic datasets,
                 up to hundreds of gigabytes. The results suggest
                 significant (often orders of magnitude) performance
                 improvement achieved by our new algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2011:SMD,
  author =       "Di Yang and Elke A. Rundensteiner and Matthew O.
                 Ward",
  title =        "Summarization and matching of density-based clusters
                 in streaming environments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "2",
  pages =        "121--132",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:08 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Density-based cluster mining is known to serve a broad
                 range of applications ranging from stock trade analysis
                 to moving object monitoring. Although methods for
                 efficient extraction of density-based clusters have
                 been studied in the literature, the problem of
                 summarizing and matching of such clusters with
                 arbitrary shapes and complex cluster structures remains
                 unsolved. Therefore, the goal of our work is to extend
                 the state-of-art of density-based cluster mining in
                 streams from cluster extraction only to now also
                 support analysis and management of the extracted
                 clusters. Our work solves three major technical
                 challenges. First, we propose a novel multi-resolution
                 cluster summarization method, called Skeletal Grid
                 Summarization (SGS), which captures the key features of
                 density-based clusters, covering both their external
                 shape and internal cluster structures. Second, in order
                 to summarize the extracted clusters in real-time, we
                 present an integrated computation strategy C-SGS, which
                 piggybacks the generation of cluster summarizations
                 within the online clustering process. Lastly, we design
                 a mechanism to efficiently execute cluster matching
                 queries, which identify similar clusters for given
                 cluster of analyst's interest from clusters extracted
                 earlier in the stream history. Our experimental study
                 using real streaming data shows the clear superiority
                 of our proposed methods in both efficiency and
                 effectiveness for cluster summarization and cluster
                 matching queries to other potential alternatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nguyen:2011:MSM,
  author =       "Thanh Nguyen and Viviane Moreira and Huong Nguyen and
                 Hoa Nguyen and Juliana Freire",
  title =        "Multilingual schema matching for {Wikipedia}
                 infoboxes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "2",
  pages =        "133--144",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:08 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent research has taken advantage of Wikipedia's
                 multi-lingualism as a resource for cross-language
                 information retrieval and machine translation, as well
                 as proposed techniques for enriching its cross-language
                 structure. The availability of documents in multiple
                 languages also opens up new opportunities for querying
                 structured Wikipedia content, and in particular, to
                 enable answers that straddle different languages. As a
                 step towards supporting such queries, in this paper, we
                 propose a method for identifying mappings between
                 attributes from infoboxes that come from pages in
                 different languages. Our approach finds mappings in a
                 completely automated fashion. Because it does not
                 require training data, it is scalable: not only can it
                 be used to find mappings between many language pairs,
                 but it is also effective for languages that are
                 under-represented and lack sufficient training samples.
                 Another important benefit of our approach is that it
                 does not depend on syntactic similarity between
                 attribute names, and thus, it can be applied to
                 language pairs that have distinct morphologies. We have
                 performed an extensive experimental evaluation using a
                 corpus consisting of pages in Portuguese, Vietnamese,
                 and English. The results show that not only does our
                 approach obtain high precision and recall, but it also
                 outperforms state-of-the-art techniques. We also
                 present a case study which demonstrates that the
                 multilingual mappings we derive lead to substantial
                 improvements in answer quality and coverage for
                 structured queries over Wikipedia content.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2011:CFP,
  author =       "Guimei Liu and Haojun Zhang and Limsoon Wong",
  title =        "Controlling false positives in association rule
                 mining",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "2",
  pages =        "145--156",
  month =        oct,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:08 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Association rule mining is an important problem in the
                 data mining area. It enumerates and tests a large
                 number of rules on a dataset and outputs rules that
                 satisfy user-specified constraints. Due to the large
                 number of rules being tested, rules that do not
                 represent real systematic effect in the data can
                 satisfy the given constraints purely by random chance.
                 Hence association rule mining often suffers from a high
                 risk of false positive errors. There is a lack of
                 comprehensive study on controlling false positives in
                 association rule mining. In this paper, we adopt three
                 multiple testing correction approaches---the direct
                 adjustment approach, the permutation-based approach and
                 the holdout approach---to control false positives in
                 association rule mining, and conduct extensive
                 experiments to study their performance. Our results
                 show that (1) Numerous spurious rules are generated if
                 no correction is made. (2) The three approaches can
                 control false positives effectively. Among the three
                 approaches, the permutation-based approach has the
                 highest power of detecting real association rules, but
                 it is very computationally expensive. We employ several
                 techniques to reduce its cost effectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Suchanek:2011:PPA,
  author =       "Fabian M. Suchanek and Serge Abiteboul and Pierre
                 Senellart",
  title =        "{PARIS}: probabilistic alignment of relations,
                 instances, and schema",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "157--168",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "One of the main challenges that the Semantic Web faces
                 is the integration of a growing number of independently
                 designed ontologies. In this work, we present paris, an
                 approach for the automatic alignment of ontologies.
                 paris aligns not only instances, but also relations and
                 classes. Alignments at the instance level
                 cross-fertilize with alignments at the schema level.
                 Thereby, our system provides a truly holistic solution
                 to the problem of ontology alignment. The heart of the
                 approach is probabilistic, i.e., we measure degrees of
                 matchings based on probability estimates. This allows
                 paris to run without any parameter tuning. We
                 demonstrate the efficiency of the algorithm and its
                 precision through extensive experiments. In particular,
                 we obtain a precision of around 90\% in experiments
                 with some of the world's largest ontologies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ranu:2011:ATQ,
  author =       "Sayan Ranu and Ambuj K. Singh",
  title =        "Answering top-$k$ queries over a mixture of attractive
                 and repulsive dimensions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "169--180",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we formulate a top-$k$ query that
                 compares objects in a database to a user-provided query
                 object on a novel scoring function. The proposed
                 scoring function combines the idea of attractive and
                 repulsive dimensions into a general framework to
                 overcome the weakness of traditional distance or
                 similarity measures. We study the properties of the
                 proposed class of scoring functions and develop
                 efficient and scalable index structures that index the
                 isolines of the function. We demonstrate various
                 scenarios where the query finds application. Empirical
                 evaluation demonstrates a performance gain of one to
                 two orders of magnitude on querying time over existing
                 state-of-the-art top-$k$ techniques. Further, a
                 qualitative analysis is performed on a real dataset to
                 highlight the potential of the proposed query in
                 discovering hidden data characteristics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Armbrust:2011:PST,
  author =       "Michael Armbrust and Kristal Curtis and Tim Kraska and
                 Armando Fox and Michael J. Franklin and David A.
                 Patterson",
  title =        "{PIQL}: success-tolerant query processing in the
                 cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "181--192",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Newly-released web applications often succumb to a
                 ``Success Disaster,'' where overloaded database
                 machines and resulting high response times destroy a
                 previously good user experience. Unfortunately, the
                 data independence provided by a traditional relational
                 database system, while useful for agile development,
                 only exacerbates the problem by hiding potentially
                 expensive queries under simple declarative expressions.
                 As a result, developers of these applications are
                 increasingly abandoning relational databases in favor
                 of imperative code written against distributed
                 key/value stores, losing the many benefits of data
                 independence in the process. Instead, we propose PIQL,
                 a declarative language that also provides scale
                 independence by calculating an upper bound on the
                 number of key/value store operations that will be
                 performed for any query. Coupled with a service level
                 objective (SLO) compliance prediction model and PIQL's
                 scalable database architecture, these bounds make it
                 easy for developers to write success-tolerant
                 applications that support an arbitrarily large number
                 of users while still providing acceptable performance.
                 In this paper, we present the PIQL query processing
                 system and evaluate its scale independence on hundreds
                 of machines using two benchmarks, TPC-W and SCADr.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhao:2011:GQE,
  author =       "Peixiang Zhao and Charu C. Aggarwal and Min Wang",
  title =        "{gSketch}: on query estimation in graph streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "193--204",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many dynamic applications are built upon large network
                 infrastructures, such as social networks, communication
                 networks, biological networks and the Web. Such
                 applications create data that can be naturally modeled
                 as graph streams, in which edges of the underlying
                 graph are received and updated sequentially in a form
                 of a stream. It is often necessary and important to
                 summarize the behavior of graph streams in order to
                 enable effective query processing. However, the sheer
                 size and dynamic nature of graph streams present an
                 enormous challenge to existing graph management
                 techniques. In this paper, we propose a new graph
                 sketch method, gSketch, which combines well studied
                 synopses for traditional data streams with a sketch
                 partitioning technique, to estimate and optimize the
                 responses to basic queries on graph streams. We
                 consider two different scenarios for query estimation:
                 (1) A graph stream sample is available; (2) Both a
                 graph stream sample and a query workload sample are
                 available. Algorithms for different scenarios are
                 designed respectively by partitioning a global sketch
                 to a group of localized sketches in order to optimize
                 the query estimation accuracy. We perform extensive
                 experimental studies on both real and synthetic data
                 sets and demonstrate the power and robustness of
                 gSketch in comparison with the state-of-the-art global
                 sketch method.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ruttenberg:2011:IEM,
  author =       "Brian E. Ruttenberg and Ambuj K. Singh",
  title =        "Indexing the earth mover's distance using normal
                 distributions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "205--216",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Querying uncertain data sets (represented as
                 probability distributions) presents many challenges due
                 to the large amount of data involved and the
                 difficulties comparing uncertainty between
                 distributions. The Earth Mover's Distance (EMD) has
                 increasingly been employed to compare uncertain data
                 due to its ability to effectively capture the
                 differences between two distributions. Computing the
                 EMD entails finding a solution to the transportation
                 problem, which is computationally intensive. In this
                 paper, we propose a new lower bound to the EMD and an
                 index structure to significantly improve the
                 performance of EMD based K-- nearest neighbor (K--NN)
                 queries on uncertain databases. We propose a new lower
                 bound to the EMD that approximates the EMD on a
                 projection vector. Each distribution is projected onto
                 a vector and approximated by a normal distribution, as
                 well as an accompanying error term. We then represent
                 each normal as a point in a Hough transformed space. We
                 then use the concept of stochastic dominance to
                 implement an efficient index structure in the
                 transformed space. We show that our method
                 significantly decreases K--NN query time on uncertain
                 databases. The index structure also scales well with
                 database cardinality. It is well suited for
                 heterogeneous data sets, helping to keep EMD based
                 queries tractable as uncertain data sets become larger
                 and more complex.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qumsiyeh:2011:GER,
  author =       "Rani Qumsiyeh and Maria S. Pera and Yiu-Kai Ng",
  title =        "Generating exact- and ranked partially-matched answers
                 to questions in advertisements",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "217--228",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Taking advantage of the Web, many advertisements (ads
                 for short) websites, which aspire to increase client's
                 transactions and thus profits, offer searching tools
                 which allow users to (i) post keyword queries to
                 capture their information needs or (ii) invoke
                 form-based interfaces to create queries by selecting
                 search options, such as a price range, filled-in
                 entries, check boxes, or drop-down menus. These search
                 mechanisms, however, are inadequate, since they cannot
                 be used to specify a natural-language query with rich
                 syntactic and semantic content, which can only be
                 handled by a question answering (QA) system.
                 Furthermore, existing ads websites are incapable of
                 evaluating arbitrary Boolean queries or retrieving
                 partially-matched answers that might be of interest to
                 the user whenever a user's search yields only a few or
                 no results at all. In solving these problems, we
                 present a QA system for ads, called CQAds, which (i)
                 allows users to post a natural-language question Q for
                 retrieving relevant ads, if they exist, (ii) identifies
                 ads as answers that partially-match the requested
                 information expressed in Q, if insufficient or no
                 answers to Q can be retrieved, which are ordered using
                 a similarity-ranking approach, and (iii) analyzes
                 incomplete or ambiguous questions to perform the ``best
                 guess'' in retrieving answers that ``best match'' the
                 selection criteria specified in Q. CQAds is also
                 equipped with a Boolean model to evaluate Boolean
                 operators that are either explicitly or implicitly
                 specified in Q, i.e., with or without Boolean operators
                 specified by the users, respectively. CQAds is easy to
                 use, scalable to all ads domains, and more powerful
                 than search tools provided by existing ads websites,
                 since its query-processing strategy retrieves relevant
                 ads of higher quality and quantity. We have verified
                 the accuracy of CQAds in retrieving ads on eight ads
                 domains and compared its ranking strategy with other
                 well-known ranking approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fakas:2011:SOS,
  author =       "Georgios J. Fakas and Zhi Cai and Nikos Mamoulis",
  title =        "Size-$l$ object summaries for relational keyword
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "229--240",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A previously proposed keyword search paradigm
                 produces, as a query result, a ranked list of Object
                 Summaries (OSs). An OS is a tree structure of related
                 tuples that summarizes all data held in a relational
                 database about a particular Data Subject (DS). However,
                 some of these OSs are very large in size and therefore
                 unfriendly to users that initially prefer synoptic
                 information before proceeding to more comprehensive
                 information about a particular DS. In this paper, we
                 investigate the effective and efficient retrieval of
                 concise and informative OSs. We argue that a good size-
                 l OS should be a stand-alone and meaningful synopsis of
                 the most important information about the particular DS.
                 More precisely, we define a size- l OS as a partial OS
                 composed of l important tuples. We propose three
                 algorithms for the efficient generation of size- l OSs
                 (in addition to the optimal approach which requires
                 exponential time). Experimental evaluation on DBLP and
                 TPC-H databases verifies the effectiveness and
                 efficiency of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fang:2011:RER,
  author =       "Lujun Fang and Anish Das Sarma and Cong Yu and Philip
                 Bohannon",
  title =        "{REX}: explaining relationships between entity pairs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "241--252",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Knowledge bases of entities and relations (either
                 constructed manually or automatically) are behind many
                 real world search engines, including those at Yahoo!,
                 Microsoft, and Google. Those knowledge bases can be
                 viewed as graphs with nodes representing entities and
                 edges representing (primary) relationships, and various
                 studies have been conducted on how to leverage them to
                 answer entity seeking queries. Meanwhile, in a
                 complementary direction, analyses over the query logs
                 have enabled researchers to identify entity pairs that
                 are statistically correlated. Such entity relationships
                 are then presented to search users through the
                 ``related searches'' feature in modern search engines.
                 However, entity relationships thus discovered can often
                 be ``puzzling'' to the users because why the entities
                 are connected is often indescribable. In this paper, we
                 propose a novel problem called entity relationship
                 explanation, which seeks to explain why a pair of
                 entities are connected, and solve this challenging
                 problem by integrating the above two complementary
                 approaches, i.e., we leverage the knowledge base to
                 ``explain'' the connections discovered between entity
                 pairs. More specifically, we present REX, a system that
                 takes a pair of entities in a given knowledge base as
                 input and efficiently identifies a ranked list of
                 relationship explanations. We formally define
                 relationship explanations and analyze their desirable
                 properties. Furthermore, we design and implement
                 algorithms to efficiently enumerate and rank all
                 relationship explanations based on multiple measures of
                 ``interestingness.'' We perform extensive experiments
                 over real web-scale data gathered from DBpedia and a
                 commercial search engine, demonstrating the efficiency
                 and scalability of REX. We also perform user studies to
                 corroborate the effectiveness of explanations generated
                 by REX.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2011:PJP,
  author =       "Guoliang Li and Dong Deng and Jiannan Wang and Jianhua
                 Feng",
  title =        "Pass-join: a partition-based method for similarity
                 joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "253--264",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As an essential operation in data cleaning, the
                 similarity join has attracted considerable attention
                 from the database community. In this paper, we study
                 string similarity joins with edit-distance constraints,
                 which find similar string pairs from two large sets of
                 strings whose edit distance is within a given
                 threshold. Existing algorithms are efficient either for
                 short strings or for long strings, and there is no
                 algorithm that can efficiently and adaptively support
                 both short strings and long strings. To address this
                 problem, we propose a partition-based method called
                 Pass-Join. Pass-Join partitions a string into a set of
                 segments and creates inverted indices for the segments.
                 Then for each string, Pass-Join selects some of its
                 substrings and uses the selected substrings to find
                 candidate pairs using the inverted indices. We devise
                 efficient techniques to select the substrings and prove
                 that our method can minimize the number of selected
                 substrings. We develop novel pruning techniques to
                 efficiently verify the candidate pairs. Experimental
                 results show that our algorithms are efficient for both
                 short strings and long strings, and outperform
                 state-of-the-art methods on real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hoobin:2011:RLZ,
  author =       "Christopher Hoobin and Simon J. Puglisi and Justin
                 Zobel",
  title =        "Relative {Lempel--Ziv} factorization for efficient
                 storage and retrieval of {Web} collections",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "3",
  pages =        "265--273",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:09 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Compression techniques that support fast random access
                 are a core component of any information system. Current
                 state-of-the-art methods group documents into
                 fixed-sized blocks and compress each block with a
                 general-purpose adaptive algorithm such as gzip. Random
                 access to a specific document then requires
                 decompression of a block. The choice of block size is
                 critical: it trades between compression effectiveness
                 and document retrieval times. In this paper we present
                 a scalable compression method for large document
                 collections that allows fast random access. We build a
                 representative sample of the collection and use it as a
                 dictionary in a LZ77-like encoding of the rest of the
                 collection, relative to the dictionary. We demonstrate
                 on large collections, that using a dictionary as small
                 as 0.1\% of the collection size, our algorithm is
                 dramatically faster than previous methods, and in
                 general gives much better compression.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2011:TCE,
  author =       "Ning Zhang and Junichi Tatemura and Jignesh M. Patel
                 and Hakan Hacig{\"u}m{\"u}s",
  title =        "Towards cost-effective storage provisioning for
                 {DBMSs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "274--285",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data center operators face a bewildering set of
                 choices when considering how to provision resources on
                 machines with complex I/O subsystems. Modern I/O
                 subsystems often have a rich mix of fast, high
                 performing, but expensive SSDs sitting alongside with
                 cheaper but relatively slower (for random accesses)
                 traditional hard disk drives. The data center operators
                 need to determine how to provision the I/O resources
                 for specific workloads so as to abide by existing
                 Service Level Agreements (SLAs), while minimizing the
                 total operating cost (TOC) of running the workload,
                 where the TOC includes the amortized hardware costs and
                 the run time energy costs. The focus of this paper is
                 on introducing this new problem of TOC-based storage
                 allocation, cast in a framework that is compatible with
                 traditional DBMS query optimization and query
                 processing architecture. We also present a
                 heuristic-based solution to this problem, called DOT.
                 We have implemented DOT in PostgreSQL, and experiments
                 using TPC-H and TPC-C demonstrate significant TOC
                 reduction by DOT in various settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Roh:2011:BTI,
  author =       "Hongchan Roh and Sanghyun Park and Sungho Kim and
                 Mincheol Shin and Sang-Won Lee",
  title =        "{B+}-tree index optimization by exploiting internal
                 parallelism of flash-based solid state drives",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "286--297",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Previous research addressed the potential problems of
                 the hard-disk oriented design of DBMSs of flashSSDs. In
                 this paper, we focus on exploiting potential benefits
                 of flashSSDs. First, we examine the internal
                 parallelism issues of flashSSDs by conducting
                 benchmarks to various flashSSDs. Then, we suggest
                 algorithm-design principles in order to best benefit
                 from the internal parallelism. We present a new I/O
                 request concept, called psync I/O that can exploit the
                 internal parallelism of flashSSDs in a single process.
                 Based on these ideas, we introduce B+-tree optimization
                 methods in order to utilize internal parallelism. By
                 integrating the results of these methods, we present a
                 B+-tree variant, PIO B-tree. We confirmed that each
                 optimization method substantially enhances the index
                 performance. Consequently, PIO B-tree enhanced
                 B+-tree's insert performance by a factor of up to 16.3,
                 while improving point-search performance by a factor of
                 1.2. The range search of PIO B-tree was up to 5 times
                 faster than that of the B+-tree. Moreover, PIO B-tree
                 outperformed other flash-aware indexes in various
                 synthetic workloads. We also confirmed that PIO B-tree
                 outperforms B+-tree in index traces collected inside
                 the PostgreSQL DBMS with TPC-C benchmark.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Larson:2011:HPC,
  author =       "Per-{\AA}ke Larson and Spyros Blanas and Cristian
                 Diaconu and Craig Freedman and Jignesh M. Patel and
                 Mike Zwilling",
  title =        "High-performance concurrency control mechanisms for
                 main-memory databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "298--309",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A database system optimized for in-memory storage can
                 support much higher transaction rates than current
                 systems. However, standard concurrency control methods
                 used today do not scale to the high transaction rates
                 achievable by such systems. In this paper we introduce
                 two efficient concurrency control methods specifically
                 designed for main-memory databases. Both use
                 multiversioning to isolate read-only transactions from
                 updates but differ in how atomicity is ensured: one is
                 optimistic and one is pessimistic. To avoid expensive
                 context switching, transactions never block during
                 normal processing but they may have to wait before
                 commit to ensure correct serialization ordering. We
                 also implemented a main-memory optimized version of
                 single-version locking. Experimental results show that
                 while single-version locking works well when
                 transactions are short and contention is low
                 performance degrades under more demanding conditions.
                 The multiversion schemes have higher overhead but are
                 much less sensitive to hotspots and the presence of
                 long-running transactions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ma:2011:CTG,
  author =       "Shuai Ma and Yang Cao and Wenfei Fan and Jinpeng Huai
                 and Tianyu Wo",
  title =        "Capturing topology in graph pattern matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "310--321",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph pattern matching is often defined in terms of
                 subgraph isomorphism, an np-complete problem. To lower
                 its complexity, various extensions of graph simulation
                 have been considered instead. These extensions allow
                 pattern matching to be conducted in cubic-time.
                 However, they fall short of capturing the topology of
                 data graphs, i.e., graphs may have a structure
                 drastically different from pattern graphs they match,
                 and the matches found are often too large to understand
                 and analyze. To rectify these problems, this paper
                 proposes a notion of strong simulation, a revision of
                 graph simulation, for graph pattern matching. (1) We
                 identify a set of criteria for preserving the topology
                 of graphs matched. We show that strong simulation
                 preserves the topology of data graphs and finds a
                 bounded number of matches. (2) We show that strong
                 simulation retains the same complexity as earlier
                 extensions of simulation, by providing a cubic-time
                 algorithm for computing strong simulation. (3) We
                 present the locality property of strong simulation,
                 which allows us to effectively conduct pattern matching
                 on distributed graphs. (4) We experimentally verify the
                 effectiveness and efficiency of these algorithms, using
                 real-life data and synthetic data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kumar:2011:PMO,
  author =       "Arun Kumar and Christopher R{\'e}",
  title =        "Probabilistic management of {OCR} data using an
                 {RDBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "322--333",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The digitization of scanned forms and documents is
                 changing the data sources that enterprises manage. To
                 integrate these new data sources with enterprise data,
                 the current state-of-the-art approach is to convert the
                 images to ASCII text using optical character
                 recognition (OCR) software and then to store the
                 resulting ASCII text in a relational database. The OCR
                 problem is challenging, and so the output of OCR often
                 contains errors. In turn, queries on the output of OCR
                 may fail to retrieve relevant answers. State-of-the-art
                 OCR programs, e.g., the OCR powering Google Books, use
                 a probabilistic model that captures many alternatives
                 during the OCR process. Only when the results of OCR
                 are stored in the database, do these approaches discard
                 the uncertainty. In this work, we propose to retain the
                 probabilistic models produced by OCR process in a
                 relational database management system. A key technical
                 challenge is that the probabilistic data produced by
                 OCR software is very large (a single book blows up to
                 2GB from 400kB as ASCII). As a result, a baseline
                 solution that integrates these models with an RDBMS is
                 over 1000x slower versus standard text processing for
                 single table select-project queries. However, many
                 applications may have quality-performance needs that
                 are in between these two extremes of ASCII and the
                 complete model output by the OCR software. Thus, we
                 propose a novel approximation scheme called Staccato
                 that allows a user to trade recall for query
                 performance. Additionally, we provide a formal analysis
                 of our scheme's properties, and describe how we
                 integrate our scheme with standard-RDBMS text
                 indexing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pawlik:2011:RRA,
  author =       "Mateusz Pawlik and Nikolaus Augsten",
  title =        "{RTED}: a robust algorithm for the tree edit
                 distance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "334--345",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We consider the classical tree edit distance between
                 ordered labeled trees, which is defined as the
                 minimum-cost sequence of node edit operations that
                 transform one tree into another. The state-of-the-art
                 solutions for the tree edit distance are not
                 satisfactory. The main competitors in the field either
                 have optimal worst-case complexity, but the worst case
                 happens frequently, or they are very efficient for some
                 tree shapes, but degenerate for others. This leads to
                 unpredictable and often infeasible runtimes. There is
                 no obvious way to choose between the algorithms. In
                 this paper we present RTED, a robust tree edit distance
                 algorithm. The asymptotic complexity of RTED is smaller
                 or equal to the complexity of the best competitors for
                 any input instance, i.e., RTED is both efficient and
                 worst-case optimal. We introduce the class of LRH
                 (Left-Right-Heavy) algorithms, which includes RTED and
                 the fastest tree edit distance algorithms presented in
                 literature. We prove that RTED outperforms all
                 previously proposed LRH algorithms in terms of runtime
                 complexity. In our experiments on synthetic and real
                 world data we empirically evaluate our solution and
                 compare it to the state-of-the-art.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amsterdamer:2011:PLP,
  author =       "Yael Amsterdamer and Susan B. Davidson and Daniel
                 Deutch and Tova Milo and Julia Stoyanovich and Val
                 Tannen",
  title =        "Putting lipstick on pig: enabling database-style
                 workflow provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "346--357",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Workflow provenance typically assumes that each module
                 is a ``black-box'', so that each output depends on all
                 inputs (coarse-grained dependencies). Furthermore, it
                 does not model the internal state of a module, which
                 can change between repeated executions. In practice,
                 however, an output may depend on only a small subset of
                 the inputs (fine-grained dependencies) as well as on
                 the internal state of the module. We present a novel
                 provenance framework that marries database-style and
                 workflow-style provenance, by using Pig Latin to expose
                 the functionality of modules, thus capturing internal
                 state and fine-grained dependencies. A critical
                 ingredient in our solution is the use of a novel form
                 of provenance graph that models module invocations and
                 yields a compact representation of fine-grained
                 workflow provenance. It also enables a number of novel
                 graph transformation operations, allowing to choose the
                 desired level of granularity in provenance querying
                 (ZoomIn and ZoomOut), and supporting ``what-if''
                 workflow analytic queries. We implemented our approach
                 in the Lipstick system and developed a benchmark in
                 support of a systematic performance evaluation. Our
                 results demonstrate the feasibility of tracking and
                 querying fine-grained workflow provenance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gao:2011:RAS,
  author =       "Jun Gao and Ruoming Jin and Jiashuai Zhou and Jeffrey
                 Xu Yu and Xiao Jiang and Tengjiao Wang",
  title =        "Relational approach for shortest path discovery over
                 large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "358--369",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the rapid growth of large graphs, we cannot
                 assume that graphs can still be fully loaded into
                 memory, thus the disk-based graph operation is
                 inevitable. In this paper, we take the shortest path
                 discovery as an example to investigate the technique
                 issues when leveraging existing infrastructure of
                 relational database (RDB) in the graph data management.
                 Based on the observation that a variety of graph search
                 queries can be implemented by iterative operations
                 including selecting frontier nodes from visited nodes,
                 making expansion from the selected frontier nodes, and
                 merging the expanded nodes into the visited ones, we
                 introduce a relational FEM framework with three
                 corresponding operators to implement graph search tasks
                 in the RDB context. We show new features such as window
                 function and merge statement introduced by recent SQL
                 standards can not only simplify the expression but also
                 improve the performance of the FEM framework. In
                 addition, we propose two optimization strategies
                 specific to shortest path discovery inside the FEM
                 framework. First, we take a bi-directional set
                 Dijkstra's algorithm in the path finding. The
                 bi-directional strategy can reduce the search space,
                 and set Dijkstra's algorithm finds the shortest path in
                 a set-at-a-time fashion. Second, we introduce an index
                 named SegTable to preserve the local shortest segments,
                 and exploit SegTable to further improve the
                 performance. The final extensive experimental results
                 illustrate our relational approach with the
                 optimization strategies achieves high scalability and
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Barsky:2011:MFC,
  author =       "Marina Barsky and Sangkyum Kim and Tim Weninger and
                 Jiawei Han",
  title =        "Mining flipping correlations from large datasets with
                 taxonomies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "370--381",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper we introduce a new type of pattern --- a
                 flipping correlation pattern. The flipping patterns are
                 obtained from contrasting the correlations between
                 items at different levels of abstraction. They
                 represent surprising correlations, both positive and
                 negative, which are specific for a given abstraction
                 level, and which ``flip'' from positive to negative and
                 vice versa when items are generalized to a higher level
                 of abstraction. We design an efficient algorithm for
                 finding flipping correlations, the Flipper algorithm,
                 which outperforms na{\"\i}ve pattern mining methods by
                 several orders of magnitude. We apply Flipper to
                 real-life datasets and show that the discovered
                 patterns are non-redundant, surprising and actionable.
                 Flipper finds strong contrasting correlations in
                 itemsets with low-to-medium support, while existing
                 techniques cannot handle the pattern discovery in this
                 frequency range.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Konig:2011:SAT,
  author =       "Arnd Christian K{\"o}nig and Bolin Ding and Surajit
                 Chaudhuri and Vivek Narasayya",
  title =        "A statistical approach towards robust progress
                 estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "4",
  pages =        "382--393",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:11 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The need for accurate SQL progress estimation in the
                 context of decision support administration has led to a
                 number of techniques proposed for this task.
                 Unfortunately, no single one of these progress
                 estimators behaves robustly across the variety of SQL
                 queries encountered in practice, meaning that each
                 technique performs poorly for a significant fraction of
                 queries. This paper proposes a novel estimator
                 selection framework that uses a statistical model to
                 characterize the sets of conditions under which certain
                 estimators outperform others, leading to a significant
                 increase in estimation robustness. The generality of
                 this framework also enables us to add a number of novel
                 ``special purpose'' estimators which increase accuracy
                 further. Most importantly, the resulting model
                 generalizes well to queries very different from the
                 ones used to train it. We validate our findings using a
                 large number of industrial real-life and benchmark
                 workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2012:RSA,
  author =       "Yizhou Sun and Charu C. Aggarwal and Jiawei Han",
  title =        "Relation strength-aware clustering of heterogeneous
                 information networks with incomplete attributes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "394--405",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the rapid development of online social media,
                 online shopping sites and cyber-physical systems,
                 heterogeneous information networks have become
                 increasingly popular and content-rich over time. In
                 many cases, such networks contain multiple types of
                 objects and links, as well as different kinds of
                 attributes. The clustering of these objects can provide
                 useful insights in many applications. However, the
                 clustering of such networks can be challenging since
                 (a) the attribute values of objects are often
                 incomplete, which implies that an object may carry only
                 partial attributes or even no attributes to correctly
                 label itself; and (b) the links of different types may
                 carry different kinds of semantic meanings, and it is a
                 difficult task to determine the nature of their
                 relative importance in helping the clustering for a
                 given purpose. In this paper, we address these
                 challenges by proposing a model-based clustering
                 algorithm. We design a probabilistic model which
                 clusters the objects of different types into a common
                 hidden space, by using a user-specified set of
                 attributes, as well as the links from different
                 relations. The strengths of different types of links
                 are automatically learned, and are determined by the
                 given purpose of clustering. An iterative algorithm is
                 designed for solving the clustering problem, in which
                 the strengths of different types of links and the
                 quality of clustering results mutually enhance each
                 other. Our experimental results on real and synthetic
                 data sets demonstrate the effectiveness and efficiency
                 of the algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2012:SPD,
  author =       "Lingkun Wu and Xiaokui Xiao and Dingxiong Deng and Gao
                 Cong and Andy Diwen Zhu and Shuigeng Zhou",
  title =        "Shortest path and distance queries on road networks:
                 an experimental evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "406--417",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Computing the shortest path between two given
                 locations in a road network is an important problem
                 that finds applications in various map services and
                 commercial navigation products. The state-of-the-art
                 solutions for the problem can be divided into two
                 categories: spatial-coherence-based methods and
                 vertex-importance-based approaches. The two categories
                 of techniques, however, have not been compared
                 systematically under the same experimental framework,
                 as they were developed from two independent lines of
                 research that do not refer to each other. This renders
                 it difficult for a practitioner to decide which
                 technique should be adopted for a specific application.
                 Furthermore, the experimental evaluation of the
                 existing techniques, as presented in previous work,
                 falls short in several aspects. Some methods were
                 tested only on small road networks with up to one
                 hundred thousand vertices; some approaches were
                 evaluated using distance queries (instead of shortest
                 path queries), namely, queries that ask only for the
                 length of the shortest path; a state-of-the-art
                 technique was examined based on a faulty implementation
                 that led to incorrect query results. To address the
                 above issues, this paper presents a comprehensive
                 comparison of the most advanced spatial-coherence-based
                 and vertex-importance-based approaches. Using a variety
                 of real road networks with up to twenty million
                 vertices, we evaluated each technique in terms of its
                 preprocessing time, space consumption, and query
                 efficiency (for both shortest path and distance
                 queries). Our experimental results reveal the
                 characteristics of different techniques, based on which
                 we provide guidelines on selecting appropriate methods
                 for various scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Erdos:2012:FPP,
  author =       "D{\'o}ra Erd{\H{o}}s and Vatche Ishakian and Andrei
                 Lapets and Evimaria Terzi and Azer Bestavros",
  title =        "The filter-placement problem and its application to
                 minimizing information multiplicity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "418--429",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In many information networks, data items --- such as
                 updates in social networks, news flowing through
                 interconnected RSS feeds and blogs, measurements in
                 sensor networks, route updates in ad-hoc networks ---
                 propagate in an uncoordinated manner: nodes often relay
                 information they receive to neighbors, independent of
                 whether or not these neighbors received the same
                 information from other sources. This uncoordinated data
                 dissemination may result in significant, yet
                 unnecessary communication and processing overheads,
                 ultimately reducing the utility of information
                 networks. To alleviate the negative impacts of this
                 information multiplicity phenomenon, we propose that a
                 subset of nodes (selected at key positions in the
                 network) carry out additional information filtering
                 functionality. Thus, nodes are responsible for the
                 removal (or significant reduction) of the redundant
                 data items relayed through them. We refer to such nodes
                 as filters. We formally define the Filter Placement
                 problem as a combinatorial optimization problem, and
                 study its computational complexity for different types
                 of graphs. We also present polynomial-time
                 approximation algorithms and scalable heuristics for
                 the problem. Our experimental results, which we
                 obtained through extensive simulations on synthetic and
                 real-world information flow networks, suggest that in
                 many settings a relatively small number of filters are
                 fairly effective in removing a large fraction of
                 redundant information.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Satuluri:2012:BLS,
  author =       "Venu Satuluri and Srinivasan Parthasarathy",
  title =        "{Bayesian} locality sensitive hashing for fast
                 similarity search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "430--441",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a collection of objects and an associated
                 similarity measure, the all-pairs similarity search
                 problem asks us to find all pairs of objects with
                 similarity greater than a certain user-specified
                 threshold. Locality-sensitive hashing (LSH) based
                 methods have become a very popular approach for this
                 problem. However, most such methods only use LSH for
                 the first phase of similarity search --- i.e. efficient
                 indexing for candidate generation. In this paper, we
                 present BayesLSH, a principled Bayesian algorithm for
                 the subsequent phase of similarity search ---
                 performing candidate pruning and similarity estimation
                 using LSH. A simpler variant, BayesLSH-Lite, which
                 calculates similarities exactly, is also presented. Our
                 algorithms are able to quickly prune away a large
                 majority of the false positive candidate pairs, leading
                 to significant speedups over baseline approaches. For
                 BayesLSH, we also provide probabilistic guarantees on
                 the quality of the output, both in terms of accuracy
                 and recall. Finally, the quality of BayesLSH's output
                 can be easily tuned and does not require any manual
                 setting of the number of hashes to use for similarity
                 estimation, unlike standard approaches. For two
                 state-of-the-art candidate generation algorithms,
                 AllPairs and LSH, BayesLSH enables significant
                 speedups, typically in the range 2x-20x for a wide
                 variety of datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fujiwara:2012:FET,
  author =       "Yasuhiro Fujiwara and Makoto Nakatsuji and Makoto
                 Onizuka and Masaru Kitsuregawa",
  title =        "Fast and exact top-$k$ search for random walk with
                 restart",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "442--453",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphs are fundamental data structures and have been
                 employed for centuries to model real-world systems and
                 phenomena. Random walk with restart (RWR) provides a
                 good proximity score between two nodes in a graph, and
                 it has been successfully used in many applications such
                 as automatic image captioning, recommender systems, and
                 link prediction. The goal of this work is to find nodes
                 that have top-$k$ highest proximities for a given node.
                 Previous approaches to this problem find nodes
                 efficiently at the expense of exactness. The main
                 motivation of this paper is to answer, in the
                 affirmative, the question, 'Is it possible to improve
                 the search time without sacrificing the exactness?'.
                 Our solution, K-dash, is based on two ideas: (1) It
                 computes the proximity of a selected node efficiently
                 by sparse matrices, and (2) It skips unnecessary
                 proximity computations when searching for the top-$k$
                 nodes. Theoretical analyses show that K-dash guarantees
                 result exactness. We perform comprehensive experiments
                 to verify the efficiency of K-dash. The results show
                 that K-dash can find top-$k$ nodes significantly faster
                 than the previous approaches while it guarantees
                 exactness.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bahmani:2012:DSS,
  author =       "Bahman Bahmani and Ravi Kumar and Sergei
                 Vassilvitskii",
  title =        "Densest subgraph in streaming and {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "454--465",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of finding locally dense components of a
                 graph is an important primitive in data analysis, with
                 wide-ranging applications from community mining to spam
                 detection and the discovery of biological network
                 modules. In this paper we present new algorithms for
                 finding the densest subgraph in the streaming model.
                 For any $ \epsilon > 0 $, our algorithms make $
                 O(\log_{1 + \epsilon } n) $ passes over the input and
                 find a subgraph whose density is guaranteed to be
                 within a factor $ 2 (1 + \epsilon) $ of the optimum.
                 Our algorithms are also easily parallelizable and we
                 illustrate this by realizing them in the MapReduce
                 model. In addition we perform extensive experimental
                 evaluation on massive real-world graphs showing the
                 performance and scalability of our algorithms in
                 practice.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Silva:2012:MAS,
  author =       "Arlei Silva and Wagner {Meira, Jr.} and Mohammed J.
                 Zaki",
  title =        "Mining attribute-structure correlated patterns in
                 large attributed graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "466--477",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this work, we study the correlation between
                 attribute sets and the occurrence of dense subgraphs in
                 large attributed graphs, a task we call structural
                 correlation pattern mining. A structural correlation
                 pattern is a dense subgraph induced by a particular
                 attribute set. Existing methods are not able to extract
                 relevant knowledge regarding how vertex attributes
                 interact with dense subgraphs. Structural correlation
                 pattern mining combines aspects of frequent itemset and
                 quasi-clique mining problems. We propose statistical
                 significance measures that compare the structural
                 correlation of attribute sets against their expected
                 values using null models. Moreover, we evaluate the
                 interestingness of structural correlation patterns in
                 terms of size and density. An efficient algorithm that
                 combines search and pruning strategies in the
                 identification of the most relevant structural
                 correlation patterns is presented. We apply our method
                 for the analysis of three real-world attributed graphs:
                 a collaboration, a music, and a citation network,
                 verifying that it provides valuable knowledge in a
                 feasible time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schnaitter:2012:SAI,
  author =       "Karl Schnaitter and Neoklis Polyzotis",
  title =        "Semi-automatic index tuning: keeping {DBAs} in the
                 loop",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "478--489",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To obtain a high level of system performance, a
                 database administrator (DBA) must choose a set of
                 indices that is appropriate for the workload. The
                 system can aid in this challenging task by providing
                 recommendations for the index configuration. We propose
                 a new index recommendation technique, termed
                 semi-automatic tuning, that keeps the DBA ``in the
                 loop'' by generating recommendations that use feedback
                 about the DBA's preferences. The technique also works
                 online, which avoids the limitations of commercial
                 tools that require the workload to be known in advance.
                 The foundation of our approach is the Work Function
                 Algorithm, which can solve a wide variety of online
                 optimization problems with strong competitive
                 guarantees. We present an experimental analysis that
                 validates the benefits of semi-automatic tuning in a
                 wide variety of conditions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fink:2012:APD,
  author =       "Robert Fink and Larisa Han and Dan Olteanu",
  title =        "Aggregation in probabilistic databases via knowledge
                 compilation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "5",
  pages =        "490--501",
  month =        jan,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:13 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents a query evaluation technique for
                 positive relational algebra queries with aggregates on
                 a representation system for probabilistic data based on
                 the algebraic structures of semiring and semimodule.
                 The core of our evaluation technique is a procedure
                 that compiles semimodule and semiring expressions into
                 so-called decomposition trees, for which the
                 computation of the probability distribution can be done
                 in time linear in the product of the sizes of the
                 probability distributions represented by its nodes. We
                 give syntactic characterisations of tractable queries
                 with aggregates by exploiting the connection between
                 query tractability and polynomial-time decomposition
                 trees. A prototype of the technique is incorporated in
                 the probabilistic database engine SPROUT. We report on
                 performance experiments with custom datasets and TPC-H
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Halim:2012:SDC,
  author =       "Felix Halim and Stratos Idreos and Panagiotis Karras
                 and Roland H. C. Yap",
  title =        "Stochastic database cracking: towards robust adaptive
                 indexing in main-memory column-stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "6",
  pages =        "502--513",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:15 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern business applications and scientific databases
                 call for inherently dynamic data storage environments.
                 Such environments are characterized by two challenging
                 features: (a) they have little idle system time to
                 devote on physical design; and (b) there is little, if
                 any, a priori workload knowledge, while the query and
                 data workload keeps changing dynamically. In such
                 environments, traditional approaches to index building
                 and maintenance cannot apply. Database cracking has
                 been proposed as a solution that allows on-the-fly
                 physical data reorganization, as a collateral effect of
                 query processing. Cracking aims to continuously and
                 automatically adapt indexes to the workload at hand,
                 without human intervention. Indexes are built
                 incrementally, adaptively, and on demand. Nevertheless,
                 as we show, existing adaptive indexing methods fail to
                 deliver workload-robustness; they perform much better
                 with random workloads than with others. This frailty
                 derives from the inelasticity with which these
                 approaches interpret each query as a hint on how data
                 should be stored. Current cracking schemes blindly
                 reorganize the data within each query's range, even if
                 that results into successive expensive operations with
                 minimal indexing benefit. In this paper, we introduce
                 stochastic cracking, a significantly more resilient
                 approach to adaptive indexing. Stochastic cracking also
                 uses each query as a hint on how to reorganize data,
                 but not blindly so; it gains resilience and avoids
                 performance bottlenecks by deliberately applying
                 certain arbitrary choices in its decision-making.
                 Thereby, we bring adaptive indexing forward to a mature
                 formulation that confers the workload-robustness
                 previous approaches lacked. Our extensive experimental
                 study verifies that stochastic cracking maintains the
                 desired properties of original database cracking while
                 at the same time it performs well with diverse
                 realistic workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2012:AMA,
  author =       "Chao Li and Gerome Miklau",
  title =        "An adaptive mechanism for accurate query answering
                 under differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "6",
  pages =        "514--525",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:15 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose a novel mechanism for answering sets of
                 counting queries under differential privacy. Given a
                 workload of counting queries, the mechanism
                 automatically selects a different set of ``strategy''
                 queries to answer privately, using those answers to
                 derive answers to the workload. The main algorithm
                 proposed in this paper approximates the optimal
                 strategy for any workload of linear counting queries.
                 With no cost to the privacy guarantee, the mechanism
                 improves significantly on prior approaches and achieves
                 near-optimal error for many workloads, when applied
                 under $ (\epsilon, \delta)$-differential privacy. The
                 result is an adaptive mechanism which can help users
                 achieve good utility without requiring that they reason
                 carefully about the best formulation of their task.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Giannikis:2012:SKO,
  author =       "Georgios Giannikis and Gustavo Alonso and Donald
                 Kossmann",
  title =        "{SharedDB}: killing one thousand queries with one
                 stone",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "6",
  pages =        "526--537",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:15 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional database systems are built around the
                 query-at-a-time model. This approach tries to optimize
                 performance in a best-effort way. Unfortunately, best
                 effort is not good enough for many modern applications.
                 These applications require response time guarantees in
                 high load situations. This paper describes the design
                 of a new database architecture that is based on
                 batching queries and shared computation across possibly
                 hundreds of concurrent queries and updates. Performance
                 experiments with the TPC-W benchmark show that the
                 performance of our implementation, SharedDB, is indeed
                 robust across a wide range of dynamic workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Selke:2012:PBC,
  author =       "Joachim Selke and Christoph Lofi and Wolf-Tilo Balke",
  title =        "Pushing the boundaries of crowd-enabled databases with
                 query-driven schema expansion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "6",
  pages =        "538--549",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:15 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "By incorporating human workers into the query
                 execution process crowd-enabled databases facilitate
                 intelligent, social capabilities like completing
                 missing data at query time or performing cognitive
                 operators. But despite all their flexibility,
                 crowd-enabled databases still maintain rigid schemas.
                 In this paper, we extend crowd-enabled databases by
                 flexible query-driven schema expansion, allowing the
                 addition of new attributes to the database at query
                 time. However, the number of crowd-sourced mini-tasks
                 to fill in missing values may often be prohibitively
                 large and the resulting data quality is doubtful.
                 Instead of simple crowd-sourcing to obtain all values
                 individually, we leverage the usergenerated data found
                 in the Social Web: By exploiting user ratings we build
                 perceptual spaces, i.e., highly-compressed
                 representations of opinions, impressions, and
                 perceptions of large numbers of users. Using few
                 training samples obtained by expert crowd sourcing, we
                 then can extract all missing data automatically from
                 the perceptual space with high quality and at low
                 costs. Extensive experiments show that our approach can
                 boost both performance and quality of crowd-enabled
                 databases, while also providing the flexibility to
                 expand schemas in a query-driven fashion.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhao:2012:BAD,
  author =       "Bo Zhao and Benjamin I. P. Rubinstein and Jim Gemmell
                 and Jiawei Han",
  title =        "A {Bayesian} approach to discovering truth from
                 conflicting sources for data integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "6",
  pages =        "550--561",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:15 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In practical data integration systems, it is common
                 for the data sources being integrated to provide
                 conflicting information about the same entity.
                 Consequently, a major challenge for data integration is
                 to derive the most complete and accurate integrated
                 records from diverse and sometimes conflicting sources.
                 We term this challenge the truth finding problem. We
                 observe that some sources are generally more reliable
                 than others, and therefore a good model of source
                 quality is the key to solving the truth finding
                 problem. In this work, we propose a probabilistic
                 graphical model that can automatically infer true
                 records and source quality without any supervision. In
                 contrast to previous methods, our principled approach
                 leverages a generative process of two types of errors
                 (false positive and false negative) by modeling two
                 different aspects of source quality. In so doing, ours
                 is also the first approach designed to merge
                 multi-valued attribute types. Our method is scalable,
                 due to an efficient sampling-based inference algorithm
                 that needs very few iterations in practice and enjoys
                 linear time complexity, with an even faster incremental
                 variant. Experiments on two real world datasets show
                 that our new method outperforms existing
                 state-of-the-art approaches to the truth finding
                 problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Upadhyaya:2012:HPS,
  author =       "Prasang Upadhyaya and Magdalena Balazinska and Dan
                 Suciu",
  title =        "How to price shared optimizations in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "6",
  pages =        "562--573",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:15 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data-management-as-a-service systems are increasingly
                 being used in collaborative settings, where multiple
                 users access common datasets. Cloud providers have the
                 choice to implement various optimizations, such as
                 indexing or materialized views, to accelerate queries
                 over these datasets. Each optimization carries a cost
                 and may benefit multiple users. This creates a major
                 challenge: how to select which optimizations to perform
                 and how to share their cost among users. The problem is
                 especially challenging when users are selfish and will
                 only report their true values for different
                 optimizations if doing so maximizes their utility. In
                 this paper, we present a new approach for selecting and
                 pricing shared optimizations by using Mechanism Design.
                 We first show how to apply the Shapley Value Mechanism
                 to the simple case of selecting and pricing additive
                 optimizations, assuming an offline game where all users
                 access the service for the same time-period. Second, we
                 extend the approach to online scenarios where users
                 come and go. Finally, we consider the case of
                 substitutive optimizations. We show analytically that
                 our mechanisms induce truthfulness and recover the
                 optimization costs. We also show experimentally that
                 our mechanisms yield higher utility than the
                 state-of-the-art approach based on regret
                 accumulation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Angel:2012:DSM,
  author =       "Albert Angel and Nikos Sarkas and Nick Koudas and
                 Divesh Srivastava",
  title =        "Dense subgraph maintenance under streaming edge weight
                 updates for real-time story identification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "6",
  pages =        "574--585",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:15 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent years have witnessed an unprecedented
                 proliferation of social media. People around the globe
                 author, every day, millions of blog posts, micro-blog
                 posts, social network status updates, etc. This rich
                 stream of information can be used to identify, on an
                 ongoing basis, emerging stories, and events that
                 capture popular attention. Stories can be identified
                 via groups of tightly-coupled real-world entities,
                 namely the people, locations, products, etc., that are
                 involved in the story. The sheer scale, and rapid
                 evolution of the data involved necessitate highly
                 efficient techniques for identifying important stories
                 at every point of time. The main challenge in real-time
                 story identification is the maintenance of dense
                 subgraphs (corresponding to groups of tightly-coupled
                 entities) under streaming edge weight updates
                 (resulting from a stream of user-generated content).
                 This is the first work to study the efficient
                 maintenance of dense subgraphs under such streaming
                 edge weight updates. For a wide range of definitions of
                 density, we derive theoretical results regarding the
                 magnitude of change that a single edge weight update
                 can cause. Based on these, we propose a novel
                 algorithm, DynDens, which outperforms adaptations of
                 existing techniques to this setting, and yields
                 meaningful results. Our approach is validated by a
                 thorough experimental evaluation on large-scale real
                 and synthetic datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elghandour:2012:RRR,
  author =       "Iman Elghandour and Ashraf Aboulnaga",
  title =        "{ReStore}: reusing results of {MapReduce} jobs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "6",
  pages =        "586--597",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 24 07:52:15 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analyzing large scale data has emerged as an important
                 activity for many organizations in the past few years.
                 This large scale data analysis is facilitated by the
                 MapReduce programming and execution model and its
                 implementations, most notably Hadoop. Users of
                 MapReduce often have analysis tasks that are too
                 complex to express as individual MapReduce jobs.
                 Instead, they use high-level query languages such as
                 Pig, Hive, or Jaql to express their complex tasks. The
                 compilers of these languages translate queries into
                 workflows of MapReduce jobs. Each job in these
                 workflows reads its input from the distributed file
                 system used by the MapReduce system and produces output
                 that is stored in this distributed file system and read
                 as input by the next job in the workflow. The current
                 practice is to delete these intermediate results from
                 the distributed file system at the end of executing the
                 workflow. One way to improve the performance of
                 workflows of MapReduce jobs is to keep these
                 intermediate results and reuse them for future
                 workflows submitted to the system. In this paper, we
                 present ReStore, a system that manages the storage and
                 reuse of such intermediate results. ReStore can reuse
                 the output of whole MapReduce jobs that are part of a
                 workflow, and it can also create additional reuse
                 opportunities by materializing and storing the output
                 of query execution operators that are executed within a
                 MapReduce job. We have implemented ReStore as an
                 extension to the Pig dataflow system on top of Hadoop,
                 and we experimentally demonstrate significant speedups
                 on queries from the PigMix benchmark.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khoussainova:2012:PDM,
  author =       "Nodira Khoussainova and Magdalena Balazinska and Dan
                 Suciu",
  title =        "{PerfXplain}: debugging {MapReduce} job performance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "7",
  pages =        "598--609",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:09 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While users today have access to many tools that
                 assist in performing large scale data analysis tasks,
                 understanding the performance characteristics of their
                 parallel computations, such as MapReduce jobs, remains
                 difficult. We present PerfXplain, a system that enables
                 users to ask questions about the relative performances
                 (i.e., runtimes) of pairs of MapReduce jobs. PerfXplain
                 provides a new query language for articulating
                 performance queries and an algorithm for generating
                 explanations from a log of past MapReduce job
                 executions. We formally define the notion of an
                 explanation together with three metrics, relevance,
                 precision, and generality, that measure explanation
                 quality. We present the explanation-generation
                 algorithm based on techniques related to decision-tree
                 building. We evaluate the approach on a log of past
                 executions on Amazon EC2, and show that our approach
                 can generate quality explanations, outperforming two
                 na{\"\i}ve explanation-generation methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gullo:2012:UCB,
  author =       "Francesco Gullo and Andrea Tagarelli",
  title =        "Uncertain centroid based partitional clustering of
                 uncertain data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "7",
  pages =        "610--621",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:09 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Clustering uncertain data has emerged as a challenging
                 task in uncertain data management and mining. Thanks to
                 a computational complexity advantage over other
                 clustering paradigms, partitional clustering has been
                 particularly studied and a number of algorithms have
                 been developed. While existing proposals differ mainly
                 in the notions of cluster centroid and clustering
                 objective function, little attention has been given to
                 an analysis of their characteristics and limits. In
                 this work, we theoretically investigate major existing
                 methods of partitional clustering, and alternatively
                 propose a well-founded approach to clustering uncertain
                 data based on a novel notion of cluster centroid. A
                 cluster centroid is seen as an uncertain object defined
                 in terms of a random variable whose realizations are
                 derived based on all deterministic representations of
                 the objects to be clustered. As demonstrated
                 theoretically and experimentally, this allows for
                 better representing a cluster of uncertain objects,
                 thus supporting a consistently improved clustering
                 performance while maintaining comparable efficiency
                 with existing partitional clustering algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bahmani:2012:SM,
  author =       "Bahman Bahmani and Benjamin Moseley and Andrea Vattani
                 and Ravi Kumar and Sergei Vassilvitskii",
  title =        "Scalable $k$-means$ + + $",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "7",
  pages =        "622--633",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:09 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Over half a century old and showing no signs of aging,
                 $k$-means remains one of the most popular data
                 processing algorithms. As is well-known, a proper
                 initialization of $k$-means is crucial for obtaining a
                 good final solution. The recently proposed $k$-means++
                 initialization algorithm achieves this, obtaining an
                 initial set of centers that is provably close to the
                 optimum solution. A major downside of the $k$-means++
                 is its inherent sequential nature, which limits its
                 applicability to massive data: one must make $k$ passes
                 over the data to find a good initial set of centers. In
                 this work we show how to drastically reduce the number
                 of passes needed to obtain, in parallel, a good
                 initialization. This is unlike prevailing efforts on
                 parallelizing $k$-means that have mostly focused on the
                 post-initialization phases of $k$-means. We prove that
                 our proposed initialization algorithm $k$-means||
                 obtains a nearly optimal solution after a logarithmic
                 number of passes, and then show that in practice a
                 constant number of passes suffices. Experimental
                 evaluation on real-world large-scale data demonstrates
                 that $k$-means|| outperforms $k$-means++ in both
                 sequential and parallel settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Benedikt:2012:QSA,
  author =       "Michael Benedikt and Pierre Bourhis and Clemens Ley",
  title =        "Querying schemas with access restrictions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "7",
  pages =        "634--645",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:09 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study verification of systems whose transitions
                 consist of accesses to a Web-based data-source. An
                 access is a lookup on a relation within a relational
                 database, fixing values for a set of positions in the
                 relation. For example, a transition can represent
                 access to a Web form, where the user is restricted to
                 filling in values for a particular set of fields. We
                 look at verifying properties of a schema describing the
                 possible accesses of such a system. We present a
                 language where one can describe the properties of an
                 access path, and also specify additional restrictions
                 on accesses that are enforced by the schema. Our main
                 property language, AccLTL, is based on a first-order
                 extension of linear-time temporal logic, interpreting
                 access paths as sequences of relational structures. We
                 also present a lower-level automaton model, A-automata,
                 which AccLTL specifications can compile into. We show
                 that AccLTL and A-automata can express static analysis
                 problems related to ``querying with limited access
                 patterns'' that have been studied in the database
                 literature in the past, such as whether an access is
                 relevant to answering a query, and whether two queries
                 are equivalent in the accessible data they can return.
                 We prove decidability and complexity results for
                 several restrictions and variants of AccLTL, and
                 explain which properties of paths can be expressed in
                 each restriction.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Graefe:2012:DDR,
  author =       "Goetz Graefe and Harumi Kuno",
  title =        "Definition, detection, and recovery of single-page
                 failures, a fourth class of database failures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "7",
  pages =        "646--655",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:09 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The three traditional failure classes are system,
                 media, and transaction failures. Sometimes, however,
                 modern storage exhibits failures that differ from all
                 of those. In order to capture and describe such cases,
                 single-page failures are introduced as a fourth failure
                 class. This class encompasses all failures to read a
                 data page correctly and with plausible contents despite
                 all correction attempts in lower system levels.
                 Efficient recovery seems to require a new data
                 structure called the page recovery index. Its
                 transactional maintenance can be accomplished writing
                 the same number of log records as today's efficient
                 implementations of logging and recovery. Detection and
                 recovery of a single-page failure can be sufficiently
                 fast that the affected data access is merely delayed,
                 without the need to abort the transaction.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Graefe:2012:CCA,
  author =       "Goetz Graefe and Felix Halim and Stratos Idreos and
                 Harumi Kuno and Stefan Manegold",
  title =        "Concurrency control for adaptive indexing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "7",
  pages =        "656--667",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:09 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Adaptive indexing initializes and optimizes indexes
                 incrementally, as a side effect of query processing.
                 The goal is to achieve the benefits of indexes while
                 hiding or minimizing the costs of index creation.
                 However, index-optimizing side effects seem to turn
                 read-only queries into update transactions that might,
                 for example, create lock contention. This paper studies
                 concurrency control in the context of adaptive
                 indexing. We show that the design and implementation of
                 adaptive indexing rigorously separates index structures
                 from index contents; this relaxes the constraints and
                 requirements during adaptive indexing compared to those
                 of traditional index updates. Our design adapts to the
                 fact that an adaptive index is refined continuously,
                 and exploits any concurrency opportunities in a dynamic
                 way. A detailed experimental analysis demonstrates that
                 (a) adaptive indexing maintains its adaptive properties
                 even when running concurrent queries, (b) adaptive
                 indexing can exploit the opportunity for parallelism
                 due to concurrent queries, (c) the number of
                 concurrency conflicts and any concurrency
                 administration overheads follow an adaptive behavior,
                 decreasing as the workload evolves and adapting to the
                 workload needs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeng:2012:CSB,
  author =       "Qiang Zeng and Hai Zhuge",
  title =        "Comments on {``Stack-based Algorithms for Pattern
                 Matching on DAGs''}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "7",
  pages =        "668--679",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:09 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The paper ``Stack-based Algorithms for Pattern
                 Matching on DAGs'' generalizes the classical holistic
                 twig join algorithms and proposes PathStackD,
                 TwigStackD and DagStackD to respectively evaluate path,
                 twig and DAG pattern queries on directed acyclic
                 graphs. In this paper, we investigate the major results
                 of that paper, pointing out several discrepancies and
                 proposing solutions to resolving them. We show that the
                 original algorithms do not find particular types of
                 query solutions that are common in practice. We also
                 analyze the effect of an underlying assumption on the
                 correctness of the algorithms and discuss the
                 pre-filtering process that the original work proposes
                 to prune redundant nodes. Our experimental study on
                 both real and synthetic data substantiates our
                 conclusions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dalvi:2012:ASD,
  author =       "Nilesh Dalvi and Ashwin Machanavajjhala and Bo Pang",
  title =        "An analysis of structured data on the web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "7",
  pages =        "680--691",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:09 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we analyze the nature and distribution
                 of structured data on the Web. Web-scale information
                 extraction, or the problem of creating structured
                 tables using extraction from the entire web, is
                 gathering lots of research interest. We perform a study
                 to understand and quantify the value of Web-scale
                 extraction, and how structured information is
                 distributed amongst top aggregator websites and tail
                 sites for various interesting domains. We believe this
                 is the first study of its kind, and gives us new
                 insights for information extraction over the Web.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mouratidis:2012:SPC,
  author =       "Kyriakos Mouratidis and Man Lung Yiu",
  title =        "Shortest path computation with no information
                 leakage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "8",
  pages =        "692--703",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Shortest path computation is one of the most common
                 queries in location-based services (LBSs). Although
                 particularly useful, such queries raise serious privacy
                 concerns. Exposing to a (potentially untrusted) LBS the
                 client's position and her destination may reveal
                 personal information, such as social habits, health
                 condition, shopping preferences, lifestyle choices,
                 etc. The only existing method for privacy-preserving
                 shortest path computation follows the obfuscation
                 paradigm; it prevents the LBS from inferring the source
                 and destination of the query with a probability higher
                 than a threshold. This implies, however, that the LBS
                 still deduces some information (albeit not exact) about
                 the client's location and her destination. In this
                 paper we aim at strong privacy, where the adversary
                 learns nothing about the shortest path query. We
                 achieve this via established private information
                 retrieval techniques, which we treat as black-box
                 building blocks. Experiments on real, large-scale road
                 networks assess the practicality of our schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Metwally:2012:VSJ,
  author =       "Ahmed Metwally and Christos Faloutsos",
  title =        "{V-SMART-join}: a scalable {MapReduce} framework for
                 all-pair similarity joins of multisets and vectors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "8",
  pages =        "704--715",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This work proposes V-SMART-Join, a scalable
                 MapReduce-based framework for discovering all pairs of
                 similar entities. The V-SMART-Join framework is
                 applicable to sets, multisets, and vectors.
                 V-SMART-Join is motivated by the observed skew in the
                 underlying distributions of Internet traffic, and is a
                 family of 2-stage algorithms, where the first stage
                 computes and joins the partial results, and the second
                 stage computes the similarity exactly for all candidate
                 pairs. The V-SMART-Join algorithms are very efficient
                 and scalable in the number of entities, as well as
                 their cardinalities. They were up to 30 times faster
                 than the state of the art algorithm, VCL, when compared
                 on a real dataset of a small size. We also established
                 the scalability of the proposed algorithms by running
                 them on a dataset of a realistic size, on which VCL
                 never succeeded to finish. Experiments were run using
                 real datasets of IPs and cookies, where each IP is
                 represented as a multiset of cookies, and the goal is
                 to discover similar IPs to identify Internet proxies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Low:2012:DGF,
  author =       "Yucheng Low and Danny Bickson and Joseph Gonzalez and
                 Carlos Guestrin and Aapo Kyrola and Joseph M.
                 Hellerstein",
  title =        "{Distributed GraphLab}: a framework for machine
                 learning and data mining in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "8",
  pages =        "716--727",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While high-level data parallel frameworks, like
                 MapReduce, simplify the design and implementation of
                 large-scale data processing systems, they do not
                 naturally or efficiently support many important data
                 mining and machine learning algorithms and can lead to
                 inefficient learning systems. To help fill this
                 critical void, we introduced the GraphLab abstraction
                 which naturally expresses asynchronous, dynamic,
                 graph-parallel computation while ensuring data
                 consistency and achieving a high degree of parallel
                 performance in the shared-memory setting. In this
                 paper, we extend the GraphLab framework to the
                 substantially more challenging distributed setting
                 while preserving strong data consistency guarantees. We
                 develop graph based extensions to pipelined locking and
                 data versioning to reduce network congestion and
                 mitigate the effect of network latency. We also
                 introduce fault tolerance to the GraphLab abstraction
                 using the classic Chandy-Lamport snapshot algorithm and
                 demonstrate how it can be easily implemented by
                 exploiting the GraphLab abstraction itself. Finally, we
                 evaluate our distributed implementation of the GraphLab
                 abstraction on a large Amazon EC2 deployment and show
                 1-2 orders of magnitude performance gains over
                 Hadoop-based implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeng:2012:ALO,
  author =       "Qiang Zeng and Xiaorui Jiang and Hai Zhuge",
  title =        "Adding logical operators to tree pattern queries on
                 graph-structured data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "8",
  pages =        "728--739",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As data are increasingly modeled as graphs for
                 expressing complex relationships, the tree pattern
                 query on graph-structured data becomes an important
                 type of queries in real-world applications. Most
                 practical query languages, such as XQuery and SPARQL,
                 support logical expressions using logical-AND/OR/NOT
                 operators to define structural constraints of tree
                 patterns. In this paper, (1) we propose generalized
                 tree pattern queries (GTPQs) over graph-structured
                 data, which fully support propositional logic of
                 structural constraints. (2) We make a thorough study of
                 fundamental problems including satisfiability,
                 containment and minimization, and analyze the
                 computational complexity and the decision procedures of
                 these problems. (3) We propose a compact graph
                 representation of intermediate results and a pruning
                 approach to reduce the size of intermediate results and
                 the number of join operations --- two factors that
                 often impair the efficiency of traditional algorithms
                 for evaluating tree pattern queries. (4) We present an
                 efficient algorithm for evaluating GTPQs using 3-hop as
                 the underlying reachability index. (5) Experiments on
                 both real-life and synthetic data sets demonstrate the
                 effectiveness and efficiency of our algorithm, from
                 several times to orders of magnitude faster than
                 state-of-the-art algorithms in terms of evaluation
                 time, even for traditional tree pattern queries with
                 only conjunctive operations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Singh:2012:LSS,
  author =       "Rishabh Singh and Sumit Gulwani",
  title =        "Learning semantic string transformations from
                 examples",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "8",
  pages =        "740--751",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We address the problem of performing semantic
                 transformations on strings, which may represent a
                 variety of data types (or their combination) such as a
                 column in a relational table, time, date, currency,
                 etc. Unlike syntactic transformations, which are based
                 on regular expressions and which interpret a string as
                 a sequence of characters, semantic transformations
                 additionally require exploiting the semantics of the
                 data type represented by the string, which may be
                 encoded as a database of relational tables. Manually
                 performing such transformations on a large collection
                 of strings is error prone and cumbersome, while
                 programmatic solutions are beyond the skill-set of
                 end-users. We present a programming by example
                 technology that allows end-users to automate such
                 repetitive tasks. We describe an expressive
                 transformation language for semantic manipulation that
                 combines table lookup operations and syntactic
                 manipulations. We then present a synthesis algorithm
                 that can learn all transformations in the language that
                 are consistent with the user-provided set of
                 input-output examples. We have implemented this
                 technology as an add-in for the Microsoft Excel
                 Spreadsheet system and have evaluated it successfully
                 over several benchmarks picked from various Excel
                 help-forums.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2012:CDD,
  author =       "Changbin Liu and Lu Ren and Boon Thau Loo and Yun Mao
                 and Prithwish Basu",
  title =        "{Cologne}: a declarative distributed constraint
                 optimization platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "8",
  pages =        "752--763",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents Cologne, a declarative
                 optimization platform that enables constraint
                 optimization problems (COPs) to be declaratively
                 specified and incrementally executed in distributed
                 systems. Cologne integrates a declarative networking
                 engine with an off-the-shelf constraint solver. We have
                 developed the Colog language that combines distributed
                 Datalog used in declarative networking with language
                 constructs for specifying goals and constraints used in
                 COPs. Cologne uses novel query processing strategies
                 for processing Colog programs, by combining the use of
                 bottom-up distributed Datalog evaluation with top-down
                 goal-oriented constraint solving. Using case studies
                 based on cloud and wireless network optimizations, we
                 demonstrate that Cologne (1) can flexibly support a
                 wide range of policy-based optimizations in distributed
                 systems, (2) results in orders of magnitude less code
                 compared to imperative implementations, and (3) is
                 highly efficient with low overhead and fast convergence
                 times.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2012:OBA,
  author =       "Yi Zhang and Jun Yang",
  title =        "Optimizing {I/O} for big array analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "8",
  pages =        "764--775",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Big array analytics is becoming indispensable in
                 answering important scientific and business questions.
                 Most analysis tasks consist of multiple steps, each
                 making one or multiple passes over the arrays to be
                 analyzed and generating intermediate results. In the
                 big data setting, I/O optimization is a key to
                 efficient analytics. In this paper, we develop a
                 framework and techniques for capturing a broad range of
                 analysis tasks expressible in nested-loop forms,
                 representing them in a declarative way, and optimizing
                 their I/O by identifying sharing opportunities.
                 Experiment results show that our optimizer is capable
                 of finding execution plans that exploit nontrivial I/O
                 sharing opportunities with significant savings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bailis:2012:PBS,
  author =       "Peter Bailis and Shivaram Venkataraman and Michael J.
                 Franklin and Joseph M. Hellerstein and Ion Stoica",
  title =        "Probabilistically bounded staleness for practical
                 partial quorums",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "8",
  pages =        "776--787",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:10 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data store replication results in a fundamental
                 trade-off between operation latency and data
                 consistency. In this paper, we examine this trade-off
                 in the context of quorum-replicated data stores. Under
                 partial, or non-strict quorum replication, a data store
                 waits for responses from a subset of replicas before
                 answering a query, without guaranteeing that read and
                 write replica sets intersect. As deployed in practice,
                 these configurations provide only basic eventual
                 consistency guarantees, with no limit to the recency of
                 data returned. However, anecdotally, partial quorums
                 are often ``good enough'' for practitioners given their
                 latency benefits. In this work, we explain why partial
                 quorums are regularly acceptable in practice, analyzing
                 both the staleness of data they return and the latency
                 benefits they offer. We introduce Probabilistically
                 Bounded Staleness (PBS) consistency, which provides
                 expected bounds on staleness with respect to both
                 versions and wall clock time. We derive a closed-form
                 solution for versioned staleness as well as model
                 real-time staleness for representative Dynamo-style
                 systems under internet-scale production workloads.
                 Using PBS, we measure the latency-consistency trade-off
                 for partial quorum systems. We quantitatively
                 demonstrate how eventually consistent systems
                 frequently return consistent data within tens of
                 milliseconds while offering significant latency
                 benefits.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2012:ESM,
  author =       "Zhao Sun and Hongzhi Wang and Haixun Wang and Bin Shao
                 and Jianzhong Li",
  title =        "Efficient subgraph matching on billion node graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "788--799",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ability to handle large scale graph data is
                 crucial to an increasing number of applications. Much
                 work has been dedicated to supporting basic graph
                 operations such as subgraph matching, reachability,
                 regular expression matching, etc. In many cases, graph
                 indices are employed to speed up query processing.
                 Typically, most indices require either super-linear
                 indexing time or super-linear indexing space.
                 Unfortunately, for very large graphs, super-linear
                 approaches are almost always infeasible. In this paper,
                 we study the problem of subgraph matching on
                 billion-node graphs. We present a novel algorithm that
                 supports efficient subgraph matching for graphs
                 deployed on a distributed memory store. Instead of
                 relying on super-linear indices, we use efficient graph
                 exploration and massive parallel computing for query
                 processing. Our experimental results demonstrate the
                 feasibility of performing subgraph matching on
                 web-scale graph data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2012:ESS,
  author =       "Ye Yuan and Guoren Wang and Lei Chen and Haixun Wang",
  title =        "Efficient subgraph similarity search on large
                 probabilistic graph databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "800--811",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many studies have been conducted on seeking the
                 efficient solution for subgraph similarity search over
                 certain (deterministic) graphs due to its wide
                 application in many fields, including bioinformatics,
                 social network analysis, and Resource Description
                 Framework (RDF) data management. All these works assume
                 that the underlying data are certain. However, in
                 reality, graphs are often noisy and uncertain due to
                 various factors, such as errors in data extraction,
                 inconsistencies in data integration, and privacy
                 preserving purposes. Therefore, in this paper, we study
                 subgraph similarity search on large probabilistic graph
                 databases. Different from previous works assuming that
                 edges in an uncertain graph are independent of each
                 other, we study the uncertain graphs where edges'
                 occurrences are correlated. We formally prove that
                 subgraph similarity search over probabilistic graphs is
                 \#P-complete, thus, we employ a filter-and-verify
                 framework to speed up the search. In the filtering
                 phase, we develop tight lower and upper bounds of
                 subgraph similarity probability based on a
                 probabilistic matrix index, PMI. PMI is composed of
                 discriminative subgraph features associated with tight
                 lower and upper bounds of subgraph isomorphism
                 probability. Based on PMI, we can sort out a large
                 number of probabilistic graphs and maximize the pruning
                 capability. During the verification phase, we develop
                 an efficient sampling algorithm to validate the
                 remaining candidates. The efficiency of our proposed
                 solutions has been verified through extensive
                 experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2012:TDM,
  author =       "Jia Wang and James Cheng",
  title =        "Truss decomposition in massive networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "812--823",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The $k$-truss is a type of cohesive subgraphs proposed
                 recently for the study of networks. While the problem
                 of computing most cohesive subgraphs is NP-hard, there
                 exists a polynomial time algorithm for computing
                 $k$-truss. Compared with $k$-core which is also
                 efficient to compute, $k$-truss represents the ``core''
                 of a $k$-core that keeps the key information of, while
                 filtering out less important information from, the
                 $k$-core. However, existing algorithms for computing
                 $k$-truss are inefficient for handling today's massive
                 networks. We first improve the existing in-memory
                 algorithm for computing $k$-truss in networks of
                 moderate size. Then, we propose two I/O-efficient
                 algorithms to handle massive networks that cannot fit
                 in main memory. Our experiments on real datasets verify
                 the efficiency of our algorithms and the value of
                 $k$-truss.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2012:SST,
  author =       "Ju Fan and Guoliang Li and Lizhu Zhou and Shanshan
                 Chen and Jun Hu",
  title =        "{Seal}: spatio-textual similarity search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "824--835",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Location-based services (LBS) have become more and
                 more ubiquitous recently. Existing methods focus on
                 finding relevant points-of-interest (POIs) based on
                 users' locations and query keywords. Nowadays, modern
                 LBS applications generate a new kind of spatio-textual
                 data, regions-of-interest (ROIs), containing
                 region-based spatial information and textual
                 description, e.g., mobile user profiles with active
                 regions and interest tags. To satisfy search
                 requirements on ROIs, we study a new research problem,
                 called spatio-textual similarity search: Given a set of
                 ROIs and a query ROI, we find the similar ROIs by
                 considering spatial overlap and textual similarity.
                 Spatio-textual similarity search has many important
                 applications, e.g., social marketing in location-aware
                 social networks. It calls for an efficient search
                 method to support large scales of spatio-textual data
                 in LBS systems. To this end, we introduce a
                 filter-and-verification framework to compute the
                 answers. In the filter step, we generate signatures for
                 the ROIs and the query, and utilize the signatures to
                 generate candidates whose signatures are similar to
                 that of the query. In the verification step, we verify
                 the candidates and identify the final answers. To
                 achieve high performance, we generate effective
                 high-quality signatures, and devise efficient filtering
                 algorithms as well as pruning techniques. Experimental
                 results on real and synthetic datasets show that our
                 method achieves high performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lappas:2012:SBT,
  author =       "Theodoros Lappas and Marcos R. Vieira and Dimitrios
                 Gunopulos and Vassilis J. Tsotras",
  title =        "On the spatiotemporal burstiness of terms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "836--847",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Thousands of documents are made available to the users
                 via the web on a daily basis. One of the most
                 extensively studied problems in the context of such
                 document streams is burst identification. Given a term
                 t, a burst is generally exhibited when an unusually
                 high frequency is observed for t. While spatial and
                 temporal burstiness have been studied individually in
                 the past, our work is the first to simultaneously track
                 and measure spatiotemporal term burstiness. In
                 addition, we use the mined burstiness information
                 toward an efficient document-search engine: given a
                 user's query of terms, our engine returns a ranked list
                 of documents discussing influential events with a
                 strong spatiotemporal impact. We demonstrate the
                 efficiency of our methods with an extensive
                 experimental evaluation on real and synthetic
                 datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shirani-Mehr:2012:ERQ,
  author =       "Houtan Shirani-Mehr and Farnoush Banaei-Kashani and
                 Cyrus Shahabi",
  title =        "Efficient reachability query evaluation in large
                 spatiotemporal contact datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "848--859",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the advent of reliable positioning technologies
                 and prevalence of location-based services, it is now
                 feasible to accurately study the propagation of items
                 such as infectious viruses, sensitive information
                 pieces, and malwares through a population of moving
                 objects, e.g., individuals, mobile devices, and
                 vehicles. In such application scenarios, an item passes
                 between two objects when the objects are sufficiently
                 close (i.e., when they are, so-called, in contact), and
                 hence once an item is initiated, it can penetrate the
                 object population through the evolving network of
                 contacts among objects, termed contact network. In this
                 paper, for the first time we define and study
                 reachability queries in large (i.e., disk-resident)
                 contact datasets which record the movement of a
                 (potentially large) set of objects moving in a spatial
                 environment over an extended time period. A
                 reachability query verifies whether two objects are
                 ``reachable'' through the evolving contact network
                 represented by such contact datasets. We propose two
                 contact-dataset indexes that enable efficient
                 evaluation of such queries despite the potentially
                 humongous size of the contact datasets. With the first
                 index, termed ReachGrid, at the query time only a small
                 necessary portion of the contact network which is
                 required for reachability evaluation is constructed and
                 traversed. With the second approach, termed ReachGraph,
                 we precompute reachability at different scales and
                 leverage these precalculations at the query time for
                 efficient query processing. We optimize the placement
                 of both indexes on disk to enable efficient index
                 traversal during query processing. We study the pros
                 and cons of our proposed approaches by performing
                 extensive experiments with both real and synthetic
                 data. Based on our experimental results, our proposed
                 approaches outperform existing reachability query
                 processing techniques in contact networks by 76\% on
                 average.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nguyen:2012:BMO,
  author =       "Thi Nguyen and Zhen He and Rui Zhang and Phillip
                 Ward",
  title =        "Boosting moving object indexing through velocity
                 partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "860--871",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There have been intense research interests in moving
                 object indexing in the past decade. However, existing
                 work did not exploit the important property of skewed
                 velocity distributions. In many real world scenarios,
                 objects travel predominantly along only a few
                 directions. Examples include vehicles on road networks,
                 flights, people walking on the streets, etc. The search
                 space for a query is heavily dependent on the velocity
                 distribution of the objects grouped in the nodes of an
                 index tree. Motivated by this observation, we propose
                 the velocity partitioning (VP) technique, which
                 exploits the skew in velocity distribution to speed up
                 query processing using moving object indexes. The VP
                 technique first identifies the ``dominant velocity axes
                 (DVAs)'' using a combination of principal components
                 analysis (PCA) and $k$-means clustering. Then, a moving
                 object index (e.g., a TPR-tree) is created based on
                 each DVA, using the DVA as an axis of the underlying
                 coordinate system. An object is maintained in the index
                 whose DVA is closest to the object's current moving
                 direction. Thus, all the objects in an index are moving
                 in a near 1-dimensional space instead of a
                 2-dimensional space. As a result, the expansion of the
                 search space with time is greatly reduced, from a
                 quadratic function of the maximum speed (of the objects
                 in the search range) to a near linear function of the
                 maximum speed. The VP technique can be applied to a
                 wide range of moving object index structures. We have
                 implemented the VP technique on two representative
                 ones, the TPR*-tree and the B$^x$-tree. Extensive
                 experiments validate that the VP technique consistently
                 improves the performance of those index structures.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bidoit-Tollu:2012:TBD,
  author =       "Nicole Bidoit-Tollu and Dario Colazzo and Federico
                 Ulliana",
  title =        "Type-based detection of {XML} query-update
                 independence",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "872--883",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents a novel static analysis technique
                 to detect XML query-update independence, in the
                 presence of a schema. Rather than types, our system
                 infers chains of types. Each chain represents a path
                 that can be traversed on a valid document during
                 query/update evaluation. The resulting independence
                 analysis is precise, although it raises a challenging
                 issue: recursive schemas may lead to inference of
                 infinitely many chains. A sound and complete
                 approximation technique ensuring a finite analysis in
                 any case is presented, together with an efficient
                 implementation performing the chain-based analysis in
                 polynomial space and time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sowell:2012:MSD,
  author =       "Benjamin Sowell and Wojciech Golab and Mehul A. Shah",
  title =        "{Minuet}: a scalable distributed multiversion
                 {B}-tree",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "884--895",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data management systems have traditionally been
                 designed to support either long-running analytics
                 queries or short-lived transactions, but an increasing
                 number of applications need both. For example, online
                 games, socio-mobile apps, and e-commerce sites need to
                 not only maintain operational state, but also analyze
                 that data quickly to make predictions and
                 recommendations that improve user experience. In this
                 paper, we present Minuet, a distributed, main-memory
                 B-tree that supports both transactions and
                 copy-on-write snapshots for in-situ analytics. Minuet
                 uses main-memory storage to enable low-latency
                 transactional operations as well as analytics queries
                 without compromising transaction performance. In
                 addition to supporting read-only analytics queries on
                 snapshots, Minuet supports writable clones, so that
                 users can create branching versions of the data. This
                 feature can be quite useful, e.g. to support complex
                 ``what-if'' analysis or to facilitate wide-area
                 replication. Our experiments show that Minuet
                 outperforms a commercial main-memory database in many
                 ways. It scales to hundreds of cores and TBs of memory,
                 and can process hundreds of thousands of B-tree
                 operations per second while executing long-running
                 scans.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yin:2012:CLT,
  author =       "Hongzhi Yin and Bin Cui and Jing Li and Junjie Yao and
                 Chen Chen",
  title =        "Challenging the long tail recommendation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "9",
  pages =        "896--907",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:11 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The success of ``infinite-inventory'' retailers such
                 as Amazon.com and Netflix has been largely attributed
                 to a ``long tail'' phenomenon. Although the majority of
                 their inventory is not in high demand, these niche
                 products, unavailable at limited-inventory competitors,
                 generate a significant fraction of total revenue in
                 aggregate. In addition, tail product availability can
                 boost head sales by offering consumers the convenience
                 of ``one-stop shopping'' for both their mainstream and
                 niche tastes. However, most of existing recommender
                 systems, especially collaborative filter based methods,
                 can not recommend tail products due to the data
                 sparsity issue. It has been widely acknowledged that to
                 recommend popular products is easier yet more trivial
                 while to recommend long tail products adds more novelty
                 yet it is also a more challenging task. In this paper,
                 we propose a novel suite of graph-based algorithms for
                 the long tail recommendation. We first represent
                 user-item information with undirected edge-weighted
                 graph and investigate the theoretical foundation of
                 applying Hitting Time algorithm for long tail item
                 recommendation. To improve recommendation diversity and
                 accuracy, we extend Hitting Time and propose efficient
                 Absorbing Time algorithm to help users find their
                 favorite long tail items. Finally, we refine the
                 Absorbing Time algorithm and propose two entropy-biased
                 Absorbing Cost algorithms to distinguish the variation
                 on different user-item rating pairs, which further
                 enhances the effectiveness of long tail recommendation.
                 Empirical experiments on two real life datasets show
                 that our proposed algorithms are effective to recommend
                 long tail items and outperform state-of-the-art
                 recommendation techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pimplikar:2012:ATQ,
  author =       "Rakesh Pimplikar and Sunita Sarawagi",
  title =        "Answering table queries on the {Web} using column
                 keywords",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "908--919",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present the design of a structured search engine
                 which returns a multi-column table in response to a
                 query consisting of keywords describing each of its
                 columns. We answer such queries by exploiting the
                 millions of tables on the Web because these are much
                 richer sources of structured knowledge than free-format
                 text. However, a corpus of tables harvested from
                 arbitrary HTML web pages presents huge challenges of
                 diversity and redundancy not seen in centrally edited
                 knowledge bases. We concentrate on one concrete task in
                 this paper. Given a set of Web tables T$_1$,\ldots{},
                 T$_n$, and a query Q with q sets of keywords
                 Q$_1$,\ldots{}, Q$_q$, decide for each T$_i$ if it is
                 relevant to Q and if so, identify the mapping between
                 the columns of T$_i$ and query columns. We represent
                 this task as a graphical model that jointly maps all
                 tables by incorporating diverse sources of clues
                 spanning matches in different parts of the table,
                 corpus-wide co-occurrence statistics, and content
                 overlap across table columns. We define a novel query
                 segmentation model for matching keywords to table
                 columns, and a robust mechanism of exploiting content
                 overlap across table columns. We design efficient
                 inference algorithms based on bipartite matching and
                 constrained graph cuts to solve the joint labeling
                 task. Experiments on a workload of 59 queries over a 25
                 million web table corpus shows significant boost in
                 accuracy over baseline IR methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Goodrich:2012:EVW,
  author =       "Michael T. Goodrich and Charalampos Papamanthou and
                 Duy Nguyen and Roberto Tamassia and Cristina Videira
                 Lopes and Olga Ohrimenko and Nikos Triandopoulos",
  title =        "Efficient verification of web-content searching
                 through authenticated web crawlers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "920--931",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We consider the problem of verifying the correctness
                 and completeness of the result of a keyword search. We
                 introduce the concept of an authenticated web crawler
                 and present its design and prototype implementation. An
                 authenticated web crawler is a trusted program that
                 computes a specially-crafted signature over the web
                 contents it visits. This signature enables (i) the
                 verification of common Internet queries on web pages,
                 such as conjunctive keyword searches---this guarantees
                 that the output of a conjunctive keyword search is
                 correct and complete; (ii) the verification of the
                 content returned by such Internet queries---this
                 guarantees that web data is authentic and has not been
                 maliciously altered since the computation of the
                 signature by the crawler. In our solution, the search
                 engine returns a cryptographic proof of the query
                 result. Both the proof size and the verification time
                 are proportional only to the sizes of the query
                 description and the query result, but do not depend on
                 the number or sizes of the web pages over which the
                 search is performed. As we experimentally demonstrate,
                 the prototype implementation of our system provides a
                 low communication overhead between the search engine
                 and the user, and fast verification of the returned
                 results by the user.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Blunschi:2012:SGS,
  author =       "Lukas Blunschi and Claudio Jossen and Donald Kossmann
                 and Magdalini Mori and Kurt Stockinger",
  title =        "{SODA}: generating {SQL} for business users",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "932--943",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The purpose of data warehouses is to enable business
                 analysts to make better decisions. Over the years the
                 technology has matured and data warehouses have become
                 extremely successful. As a consequence, more and more
                 data has been added to the data warehouses and their
                 schemas have become increasingly complex. These systems
                 still work great in order to generate pre-canned
                 reports. However, with their current complexity, they
                 tend to be a poor match for non tech-savvy business
                 analysts who need answers to ad-hoc queries that were
                 not anticipated. This paper describes the design,
                 implementation, and experience of the SODA system
                 (Search over DAta Warehouse). SODA bridges the gap
                 between the business needs of analysts and the
                 technical complexity of current data warehouses. SODA
                 enables a Google-like search experience for data
                 warehouses by taking keyword queries of business users
                 and automatically generating executable SQL. The key
                 idea is to use a graph pattern matching algorithm that
                 uses the metadata model of the data warehouse. Our
                 results with real data from a global player in the
                 financial services industry show that SODA produces
                 queries with high precision and recall, and makes it
                 much easier for business users to interactively explore
                 highly-complex data warehouses.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Terrovitis:2012:PPD,
  author =       "Manolis Terrovitis and Nikos Mamoulis and John
                 Liagouris and Spiros Skiadopoulos",
  title =        "Privacy preservation by disassociation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "944--955",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this work, we focus on protection against identity
                 disclosure in the publication of sparse
                 multidimensional data. Existing multidimensional
                 anonymization techniques (a) protect the privacy of
                 users either by altering the set of quasi-identifiers
                 of the original data (e.g., by generalization or
                 suppression) or by adding noise (e.g., using
                 differential privacy) and/or (b) assume a clear
                 distinction between sensitive and non-sensitive
                 information and sever the possible linkage. In many
                 real world applications the above techniques are not
                 applicable. For instance, consider web search query
                 logs. Suppressing or generalizing anonymization methods
                 would remove the most valuable information in the
                 dataset: the original query terms. Additionally, web
                 search query logs contain millions of query terms which
                 cannot be categorized as sensitive or non-sensitive
                 since a term may be sensitive for a user and
                 non-sensitive for another. Motivated by this
                 observation, we propose an anonymization technique
                 termed disassociation that preserves the original terms
                 but hides the fact that two or more different terms
                 appear in the same record. We protect the users'
                 privacy by disassociating record terms that participate
                 in identifying combinations. This way the adversary
                 cannot associate with high probability a record with a
                 rare combination of terms. To the best of our
                 knowledge, our proposal is the first to employ such a
                 technique to provide protection against identity
                 disclosure. We propose an anonymization algorithm based
                 on our approach and evaluate its performance on real
                 and synthetic datasets, comparing it against other
                 state-of-the-art methods based on generalization and
                 differential privacy.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kanagal:2012:SRS,
  author =       "Bhargav Kanagal and Amr Ahmed and Sandeep Pandey and
                 Vanja Josifovski and Jeff Yuan and Lluis Garcia-Pueyo",
  title =        "Supercharging recommender systems using taxonomies for
                 learning user purchase behavior",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "956--967",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recommender systems based on latent factor models have
                 been effectively used for understanding user interests
                 and predicting future actions. Such models work by
                 projecting the users and items into a smaller
                 dimensional space, thereby clustering similar users and
                 items together and subsequently compute similarity
                 between unknown user-item pairs. When user-item
                 interactions are sparse (sparsity problem) or when new
                 items continuously appear (cold start problem), these
                 models perform poorly. In this paper, we exploit the
                 combination of taxonomies and latent factor models to
                 mitigate these issues and improve recommendation
                 accuracy. We observe that taxonomies provide structure
                 similar to that of a latent factor model: namely, it
                 imposes human-labeled categories (clusters) over items.
                 This leads to our proposed taxonomy-aware latent factor
                 model (TF) which combines taxonomies and latent factors
                 using additive models. We develop efficient algorithms
                 to train the TF models, which scales to large number of
                 users/items and develop scalable
                 inference/recommendation algorithms by exploiting the
                 structure of the taxonomy. In addition, we extend the
                 TF model to account for the temporal dynamics of user
                 interests using high-order Markov chains. To deal with
                 large-scale data, we develop a parallel multi-core
                 implementation of our TF model. We empirically evaluate
                 the TF model for the task of predicting user purchases
                 using a real-world shopping dataset spanning more than
                 a million users and products. Our experiments
                 demonstrate the benefits of using our TF models over
                 existing approaches, in terms of both prediction
                 accuracy and running time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ahmad:2012:DHO,
  author =       "Yanif Ahmad and Oliver Kennedy and Christoph Koch and
                 Milos Nikolic",
  title =        "{DBToaster}: higher-order delta processing for
                 dynamic, frequently fresh views",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "968--979",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Applications ranging from algorithmic trading to
                 scientific data analysis require realtime analytics
                 based on views over databases that change at very high
                 rates. Such views have to be kept fresh at low
                 maintenance cost and latencies. At the same time, these
                 views have to support classical SQL, rather than window
                 semantics, to enable applications that combine current
                 with aged or historical data. In this paper, we present
                 viewlet transforms, a recursive finite differencing
                 technique applied to queries. The viewlet transform
                 materializes a query and a set of its higher-order
                 deltas as views. These views support each other's
                 incremental maintenance, leading to a reduced overall
                 view maintenance cost. The viewlet transform of a query
                 admits efficient evaluation, the elimination of certain
                 expensive query operations, and aggressive
                 parallelization. We develop viewlet transforms into a
                 workable query execution technique, present a heuristic
                 and cost-based optimization framework, and report on
                 experiments with a prototype dynamic data management
                 system that combines viewlet transforms with an
                 optimizing compilation technique. The system supports
                 tens of thousands of complete view refreshes a second
                 for a wide range of queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Agarwal:2012:RTD,
  author =       "Manoj K. Agarwal and Krithi Ramamritham and Manish
                 Bhide",
  title =        "Real time discovery of dense clusters in highly
                 dynamic graphs: identifying real world events in highly
                 dynamic environments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "980--991",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Due to their real time nature, microblog streams are a
                 rich source of dynamic information, for example, about
                 emerging events. Existing techniques for discovering
                 such events from a microblog stream in real time (such
                 as Twitter trending topics), have several lacunae when
                 used for discovering emerging events; extant graph
                 based event detection techniques are not practical in
                 microblog settings due to their complexity; and
                 conventional techniques, which have been developed for
                 blogs, web-pages, etc., involving the use of keyword
                 search, are only useful for finding information about
                 known events. Hence, in this paper, we present
                 techniques to discover events that are unraveling in
                 microblog message streams in real time so that such
                 events can be reported as soon as they occur. We model
                 the problem as discovering dense clusters in highly
                 dynamic graphs. Despite many recent advances in graph
                 analysis, ours is the first technique to identify dense
                 clusters in massive and highly dynamic graphs in real
                 time. Given the characteristics of microblog streams,
                 in order to find clusters without missing any events,
                 we propose and exploit a novel graph property which we
                 call short-cycle property. Our algorithms find these
                 clusters efficiently in spite of rapid changes to the
                 microblog streams. Further we present a novel ranking
                 function to identify the important events. Besides
                 proving the correctness of our algorithms we show their
                 practical utility by evaluating them using real world
                 microblog data. These demonstrate our technique's
                 ability to discover, with high precision and recall,
                 emerging events in high intensity data streams in real
                 time. Many recent web applications create data which
                 can be represented as massive dynamic graphs. Our
                 technique can be easily extended to discover, in real
                 time, interesting patterns in such graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papapetrou:2012:SBQ,
  author =       "Odysseas Papapetrou and Minos Garofalakis and Antonios
                 Deligiannakis",
  title =        "Sketch-based querying of distributed sliding-window
                 data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "992--1003",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While traditional data-management systems focus on
                 evaluating single, ad-hoc queries over static data sets
                 in a centralized setting, several emerging applications
                 require (possibly, continuous) answers to queries on
                 dynamic data that is widely distributed and constantly
                 updated. Furthermore, such query answers often need to
                 discount data that is ``stale'', and operate solely on
                 a sliding window of recent data arrivals (e.g., data
                 updates occurring over the last 24 hours). Such
                 distributed data streaming applications mandate novel
                 algorithmic solutions that are both time- and
                 space-efficient (to manage high-speed data streams),
                 and also communication-efficient (to deal with physical
                 data distribution). In this paper, we consider the
                 problem of complex query answering over distributed,
                 high-dimensional data streams in the sliding-window
                 model. We introduce a novel sketching technique (termed
                 ECM-sketch) that allows effective summarization of
                 streaming data over both time-based and count-based
                 sliding windows with probabilistic accuracy guarantees.
                 Our sketch structure enables point as well as
                 inner-product queries, and can be employed to address a
                 broad range of problems, such as maintaining frequency
                 statistics, finding heavy hitters, and computing
                 quantiles in the sliding-window model. Focusing on
                 distributed environments, we demonstrate how
                 ECM-sketches of individual, local streams can be
                 composed to generate a (low-error) ECM-sketch summary
                 of the order-preserving aggregation of all streams;
                 furthermore, we show how ECM-sketches can be exploited
                 for continuous monitoring of sliding-window queries
                 over distributed streams. Our extensive experimental
                 study with two real-life data sets validates our
                 theoretical claims and verifies the effectiveness of
                 our techniques. To the best of our knowledge, ours is
                 the first work to address efficient, guaranteed-error
                 complex query answering over distributed data streams
                 in the sliding-window model.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vo:2012:LSL,
  author =       "Hoang Tam Vo and Sheng Wang and Divyakant Agrawal and
                 Gang Chen and Beng Chin Ooi",
  title =        "{LogBase}: a scalable log-structured database system
                 in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "1004--1015",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Numerous applications such as financial transactions
                 (e.g., stock trading) are write-heavy in nature. The
                 shift from reads to writes in web applications has also
                 been accelerating in recent years. Write-ahead-logging
                 is a common approach for providing recovery capability
                 while improving performance in most storage systems.
                 However, the separation of log and application data
                 incurs write overheads observed in write-heavy
                 environments and hence adversely affects the write
                 throughput and recovery time in the system. In this
                 paper, we introduce LogBase --- a scalable
                 log-structured database system that adopts log-only
                 storage for removing the write bottleneck and
                 supporting fast system recovery. It is designed to be
                 dynamically deployed on commodity clusters to take
                 advantage of elastic scaling property of cloud
                 environments. LogBase provides in-memory multiversion
                 indexes for supporting efficient access to data
                 maintained in the log. LogBase also supports
                 transactions that bundle read and write operations
                 spanning across multiple records. We implemented the
                 proposed system and compared it with HBase and a
                 disk-based log-structured record-oriented system
                 modeled after RAMCloud. The experimental results show
                 that LogBase is able to provide sustained write
                 throughput, efficient data access out of the cache, and
                 effective system recovery.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2012:EPN,
  author =       "Wei Lu and Yanyan Shen and Su Chen and Beng Chin Ooi",
  title =        "Efficient processing of $k$ nearest neighbor joins
                 using {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "1016--1027",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "k nearest neighbor join ($k$ NN join), designed to
                 find $k$ nearest neighbors from a dataset S for every
                 object in another dataset R, is a primitive operation
                 widely adopted by many data mining applications. As a
                 combination of the $k$ nearest neighbor query and the
                 join operation, $k$ NN join is an expensive operation.
                 Given the increasing volume of data, it is difficult to
                 perform a $k$ NN join on a centralized machine
                 efficiently. In this paper, we investigate how to
                 perform $k$ NN join using MapReduce which is a
                 well-accepted framework for data-intensive applications
                 over clusters of computers. In brief, the mappers
                 cluster objects into groups; the reducers perform the
                 $k$ NN join on each group of objects separately. We
                 design an effective mapping mechanism that exploits
                 pruning rules for distance filtering, and hence reduces
                 both the shuffling and computational costs. To reduce
                 the shuffling cost, we propose two approximate
                 algorithms to minimize the number of replicas.
                 Extensive experiments on our in-house cluster
                 demonstrate that our proposed methods are efficient,
                 robust and scalable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Laptev:2012:EAR,
  author =       "Nikolay Laptev and Kai Zeng and Carlo Zaniolo",
  title =        "Early accurate results for advanced analytics on
                 {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "1028--1039",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Approximate results based on samples often provide the
                 only way in which advanced analytical applications on
                 very massive data sets can satisfy their time and
                 resource constraints. Unfortunately, methods and tools
                 for the computation of accurate early results are
                 currently not supported in MapReduce-oriented systems
                 although these are intended for 'big data'. Therefore,
                 we proposed and implemented a non-parametric extension
                 of Hadoop which allows the incremental computation of
                 early results for arbitrary work-flows, along with
                 reliable on-line estimates of the degree of accuracy
                 achieved so far in the computation. These estimates are
                 based on a technique called bootstrapping that has been
                 widely employed in statistics and can be applied to
                 arbitrary functions and data distributions. In this
                 paper, we describe our Early Accurate Result Library
                 (EARL) for Hadoop that was designed to minimize the
                 changes required to the MapReduce framework. Various
                 tests of EARL of Hadoop are presented to characterize
                 the frequent situations where EARL can provide major
                 speed-ups over the current version of Hadoop.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2012:CCD,
  author =       "Xuan Liu and Meiyu Lu and Beng Chin Ooi and Yanyan
                 Shen and Sai Wu and Meihui Zhang",
  title =        "{CDAS}: a crowdsourcing data analytics system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "1040--1051",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Some complex problems, such as image tagging and
                 natural language processing, are very challenging for
                 computers, where even state-of-the-art technology is
                 yet able to provide satisfactory accuracy. Therefore,
                 rather than relying solely on developing new and better
                 algorithms to handle such tasks, we look to the
                 crowdsourcing solution --- employing human
                 participation --- to make good the shortfall in current
                 technology. Crowdsourcing is a good supplement to many
                 computer tasks. A complex job may be divided into
                 computer-oriented tasks and human-oriented tasks, which
                 are then assigned to machines and humans respectively.
                 To leverage the power of crowdsourcing, we design and
                 implement a Crowdsourcing Data Analytics System, CDAS.
                 CDAS is a framework designed to support the deployment
                 of various crowdsourcing applications. The core part of
                 CDAS is a quality-sensitive answering model, which
                 guides the crowdsourcing engine to process and monitor
                 the human tasks. In this paper, we introduce the
                 principles of our quality-sensitive model. To satisfy
                 user required accuracy, the model guides the
                 crowdsourcing query engine for the design and
                 processing of the corresponding crowdsourcing jobs. It
                 provides an estimated accuracy for each generated
                 result based on the human workers' historical
                 performances. When verifying the quality of the result,
                 the model employs an online strategy to reduce waiting
                 time. To show the effectiveness of the model, we
                 implement and deploy two analytics jobs on CDAS, a
                 twitter sentiment analytics job and an image tagging
                 job. We use real Twitter and Flickr data as our queries
                 respectively. We compare our approaches with
                 state-of-the-art classification and image annotation
                 techniques. The results show that the human-assisted
                 methods can indeed achieve a much higher accuracy. By
                 embedding the quality-sensitive model into
                 crowdsourcing query engine, we effectively reduce the
                 processing cost while maintaining the required query
                 answer quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sachan:2012:MSS,
  author =       "Mayank Sachan and Arnab Bhattacharya",
  title =        "Mining statistically significant substrings using the
                 chi-square statistic",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "1052--1063",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of identification of statistically
                 significant patterns in a sequence of data has been
                 applied to many domains such as intrusion detection
                 systems, financial models, web-click records, automated
                 monitoring systems, computational biology, cryptology,
                 and text analysis. An observed pattern of events is
                 deemed to be statistically significant if it is
                 unlikely to have occurred due to randomness or chance
                 alone. We use the chi-square statistic as a
                 quantitative measure of statistical significance. Given
                 a string of characters generated from a memoryless
                 Bernoulli model, the problem is to identify the
                 substring for which the empirical distribution of
                 single letters deviates the most from the distribution
                 expected from the generative Bernoulli model. This
                 deviation is captured using the chi-square measure. The
                 most significant substring (MSS) of a string is thus
                 defined as the substring having the highest chi-square
                 value. Till date, to the best of our knowledge, there
                 does not exist any algorithm to find the MSS in better
                 than $ O(n^2) $ time, where $n$ denotes the length of
                 the string. In this paper, we propose an algorithm to
                 find the most significant substring, whose running time
                 is $ O(n^{3 / 2})$ with high probability. We also study
                 some variants of this problem such as finding the
                 top-$t$ set, finding all substrings having chi-square
                 greater than a fixed threshold and finding the MSS
                 among substrings greater than a given length. We
                 experimentally demonstrate the asymptotic behavior of
                 the MSS on varying the string size and alphabet size.
                 We also describe some applications of our algorithm on
                 cryptology and real world data from finance and sports.
                 Finally, we compare our technique with the existing
                 heuristics for finding the MSS.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Albutiu:2012:MPS,
  author =       "Martina-Cezara Albutiu and Alfons Kemper and Thomas
                 Neumann",
  title =        "Massively parallel sort-merge joins in main memory
                 multi-core database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "1064--1075",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Two emerging hardware trends will dominate the
                 database system technology in the near future:
                 increasing main memory capacities of several TB per
                 server and massively parallel multi-core processing.
                 Many algorithmic and control techniques in current
                 database technology were devised for disk-based systems
                 where I/O dominated the performance. In this work we
                 take a new look at the well-known sort-merge join
                 which, so far, has not been in the focus of research in
                 scalable massively parallel multi-core data processing
                 as it was deemed inferior to hash joins. We devise a
                 suite of new massively parallel sort-merge (MPSM) join
                 algorithms that are based on partial partition-based
                 sorting. Contrary to classical sort-merge joins, our
                 MPSM algorithms do not rely on a hard to parallelize
                 final merge step to create one complete sort order.
                 Rather they work on the independently created runs in
                 parallel. This way our MPSM algorithms are NUMA-affine
                 as all the sorting is carried out on local memory
                 partitions. An extensive experimental evaluation on a
                 modern 32-core machine with one TB of main memory
                 proves the competitive performance of MPSM on large
                 main memory databases with billions of objects. It
                 scales (almost) linearly in the number of employed
                 cores and clearly outperforms competing hash join
                 proposals --- in particular it outperforms the
                 ``cutting-edge'' Vectorwise parallel query engine by a
                 factor of four.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Luo:2012:HDH,
  author =       "Tian Luo and Rubao Lee and Michael Mesnier and Feng
                 Chen and Xiaodong Zhang",
  title =        "{hStorage-DB}: heterogeneity-aware data management to
                 exploit the full capability of hybrid storage systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "10",
  pages =        "1076--1087",
  month =        jun,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:13 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As storage systems become increasingly heterogeneous
                 and complex, it adds burdens on DBAs, causing
                 suboptimal performance even after a lot of human
                 efforts have been made. In addition, existing
                 monitoring-based storage management by access pattern
                 detections has difficulties to handle workloads that
                 are highly dynamic and concurrent. To achieve high
                 performance by best utilizing heterogeneous storage
                 devices, we have designed and implemented a
                 heterogeneity-aware software framework for DBMS storage
                 management called hStorage-DB, where semantic
                 information that is critical for storage I/O is
                 identified and passed to the storage manager. According
                 to the collected semantic information, requests are
                 classified into different types. Each type is assigned
                 a proper QoS policy supported by the underlying storage
                 system, so that every request will be served with a
                 suitable storage device. With hStorage-DB, we can well
                 utilize semantic information that cannot be detected
                 through data access monitoring but is particularly
                 important for a hybrid storage system. To show the
                 effectiveness of hStorage-DB, we have implemented a
                 system prototype that consists of an I/O request
                 classification enabled DBMS, and a hybrid storage
                 system that is organized into a two-level caching
                 hierarchy. Our performance evaluation shows that
                 hStorage-DB can automatically make proper decisions for
                 data allocation in different storage devices and make
                 substantial performance improvements in a
                 cost-efficient way.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Choi:2012:SAM,
  author =       "Dong-Wan Choi and Chin-Wan Chung and Yufei Tao",
  title =        "A scalable algorithm for maximizing range sum in
                 spatial databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1088--1099",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper investigates the MaxRS problem in spatial
                 databases. Given a set O of weighted points and a
                 rectangular region r of a given size, the goal of the
                 MaxRS problem is to find a location of r such that the
                 sum of the weights of all the points covered by r is
                 maximized. This problem is useful in many
                 location-based applications such as finding the best
                 place for a new franchise store with a limited delivery
                 range and finding the most attractive place for a
                 tourist with a limited reachable range. However, the
                 problem has been studied mainly in theory,
                 particularly, in computational geometry. The existing
                 algorithms from the computational geometry community
                 are in-memory algorithms which do not guarantee the
                 scalability. In this paper, we propose a scalable
                 external-memory algorithm (ExactMaxRS) for the MaxRS
                 problem, which is optimal in terms of the I/O
                 complexity. Furthermore, we propose an approximation
                 algorithm (ApproxMaxCRS) for the MaxCRS problem that is
                 a circle version of the MaxRS problem. We prove the
                 correctness and optimality of the ExactMaxRS algorithm
                 along with the approximation bound of the ApproxMaxCRS
                 algorithm. From extensive experimental results, we show
                 that the ExactMaxRS algorithm is two orders of
                 magnitude faster than methods adapted from existing
                 algorithms, and the approximation bound in practice is
                 much better than the theoretical bound of the
                 ApproxMaxCRS algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aly:2012:SQT,
  author =       "Ahmed M. Aly and Walid G. Aref and Mourad Ouzzani",
  title =        "Spatial queries with two {kNN} predicates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1100--1111",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The widespread use of location-aware devices has led
                 to countless location-based services in which a user
                 query can be arbitrarily complex, i.e., one that embeds
                 multiple spatial selection and join predicates. Amongst
                 these predicates, the $k$-Nearest-Neighbor ($k$ NN)
                 predicate stands as one of the most important and
                 widely used predicates. Unlike related research, this
                 paper goes beyond the optimization of queries with
                 single $k$ NN predicates, and shows how queries with
                 two $k$ NN predicates can be optimized. In particular,
                 the paper addresses the optimization of queries with:
                 (i) two $k$ NN-select predicates, (ii) two $k$ NN-join
                 predicates, and (iii) one $k$ NN-join predicate and one
                 $k$ NN-select predicate. For each type of queries,
                 conceptually correct query evaluation plans (QEPs) and
                 new algorithms that optimize the query execution time
                 are presented. Experimental results demonstrate that
                 the proposed algorithms outperform the conceptually
                 correct QEPs by orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sheng:2012:OAC,
  author =       "Cheng Sheng and Nan Zhang and Yufei Tao and Xin Jin",
  title =        "Optimal algorithms for crawling a hidden database in
                 the web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1112--1123",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A hidden database refers to a dataset that an
                 organization makes accessible on the web by allowing
                 users to issue queries through a search interface. In
                 other words, data acquisition from such a source is not
                 by following static hyper-links. Instead, data are
                 obtained by querying the interface, and reading the
                 result page dynamically generated. This, with other
                 facts such as the interface may answer a query only
                 partially, has prevented hidden databases from being
                 crawled effectively by existing search engines. This
                 paper remedies the problem by giving algorithms to
                 extract all the tuples from a hidden database. Our
                 algorithms are provably efficient, namely, they
                 accomplish the task by performing only a small number
                 of queries, even in the worst case. We also establish
                 theoretical results indicating that these algorithms
                 are asymptotically optimal --- i.e., it is impossible
                 to improve their efficiency by more than a constant
                 factor. The derivation of our upper and lower bound
                 results reveals significant insight into the
                 characteristics of the underlying problem. Extensive
                 experiments confirm the proposed techniques work very
                 well on all the real datasets examined.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qin:2012:DTR,
  author =       "Lu Qin and Jeffrey Xu Yu and Lijun Chang",
  title =        "Diversifying top-$k$ results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1124--1135",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Top-$k$ query processing finds a list of $k$ results
                 that have largest scores w.r.t the user given query,
                 with the assumption that all the $k$ results are
                 independent to each other. In practice, some of the
                 top-$k$ results returned can be very similar to each
                 other. As a result some of the top-$k$ results returned
                 are redundant. In the literature, diversified top-$k$
                 search has been studied to return $k$ results that take
                 both score and diversity into consideration. Most
                 existing solutions on diversified top-$k$ search assume
                 that scores of all the search results are given, and
                 some works solve the diversity problem on a specific
                 problem and can hardly be extended to general cases. In
                 this paper, we study the diversified top-$k$ search
                 problem. We define a general diversified top-$k$ search
                 problem that only considers the similarity of the
                 search results themselves. We propose a framework, such
                 that most existing solutions for top-$k$ query
                 processing can be extended easily to handle diversified
                 top-$k$ search, by simply applying three new functions,
                 a sufficient stop condition sufficient(), a necessary
                 stop condition necessary(), and an algorithm for
                 diversified top-$k$ search on the current set of
                 generated results, div-search-current(). We propose
                 three new algorithms, namely, div-astar, div-dp, and
                 div-cut to solve the div-search-current() problem.
                 div-astar is an A* based algorithm, div-dp is an
                 algorithm that decomposes the results into components
                 which are searched using div-astar independently and
                 combined using dynamic programming. div-cut further
                 decomposes the current set of generated results using
                 cut points and combines the results using sophisticated
                 operations. We conducted extensive performance studies
                 using two real datasets, enwiki and reuters. Our
                 div-cut algorithm finds the optimal solution for
                 diversified top-$k$ search problem in seconds even for
                 $k$ as large as 2, 000.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2012:KAO,
  author =       "Xin Cao and Lisi Chen and Gao Cong and Xiaokui Xiao",
  title =        "Keyword-aware optimal route search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1136--1147",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Identifying a preferable route is an important problem
                 that finds applications in map services. When a user
                 plans a trip within a city, the user may want to find
                 ``a most popular route such that it passes by shopping
                 mall, restaurant, and pub, and the travel time to and
                 from his hotel is within 4 hours.'' However, none of
                 the algorithms in the existing work on route planning
                 can be used to answer such queries. Motivated by this,
                 we define the problem of keyword-aware optimal route
                 query, denoted by KOR, which is to find an optimal
                 route such that it covers a set of user-specified
                 keywords, a specified budget constraint is satisfied,
                 and an objective score of the route is optimal. The
                 problem of answering KOR queries is NP-hard. We devise
                 an approximation algorithm OSScaling with provable
                 approximation bounds. Based on this algorithm, another
                 more efficient approximation algorithm BucketBound is
                 proposed. We also design a greedy approximation
                 algorithm. Results of empirical studies show that all
                 the proposed algorithms are capable of answering KOR
                 queries efficiently, while the BucketBound and Greedy
                 algorithms run faster. The empirical studies also offer
                 insight into the accuracy of the proposed algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cautis:2012:AQU,
  author =       "Bogdan Cautis and Evgeny Kharlamov",
  title =        "Answering queries using views over probabilistic
                 {XML}: complexity and tractability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1148--1159",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the complexity of query answering using views
                 in a probabilistic XML setting, identifying large
                 classes of XPath queries --- with child and descendant
                 navigation and predicates --- for which there are
                 efficient (PTime) algorithms. We consider this problem
                 under the two possible semantics for XML query results:
                 with persistent node identifiers and in their absence.
                 Accordingly, we consider rewritings that can exploit a
                 single view, by means of compensation, and rewritings
                 that can use multiple views, by means of intersection.
                 Since in a probabilistic setting queries return answers
                 with probabilities, the problem of rewriting goes
                 beyond the classic one of retrieving XML answers from
                 views. For both semantics of XML queries, we show that,
                 even when XML answers can be retrieved from views,
                 their probabilities may not be computable. For
                 rewritings that use only compensation, we describe a
                 PTime decision procedure, based on easily verifiable
                 criteria that distinguish between the feasible cases
                 --- when probabilistic XML results are computable ---
                 and the unfeasible ones. For rewritings that can use
                 multiple views, with compensation and intersection, we
                 identify the most permissive conditions that make
                 probabilistic rewriting feasible, and we describe an
                 algorithm that is sound in general, and becomes
                 complete under fairly permissive restrictions, running
                 in PTime modulo worst-case exponential time equivalence
                 tests. This is the best we can hope for since
                 intersection makes query equivalence intractable
                 already over deterministic data. Our algorithm runs in
                 PTime whenever deterministic rewritings can be found in
                 PTime.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jha:2012:PDM,
  author =       "Abhay Jha and Dan Suciu",
  title =        "Probabilistic databases with {MarkoViews}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1160--1171",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Most of the work on query evaluation in probabilistic
                 databases has focused on the simple tuple-independent
                 data model, where tuples are independent random events.
                 Several efficient query evaluation techniques exists in
                 this setting, such as safe plans, algorithms based on
                 OBDDs, tree-decomposition and a variety of
                 approximation algorithms. However, complex data
                 analytics tasks often require complex correlations, and
                 query evaluation then is significantly more expensive,
                 or more restrictive. In this paper, we propose MVDB as
                 a framework both for representing complex correlations
                 and for efficient query evaluation. An MVDB specifies
                 correlations by views, called MarkoViews, on the
                 probabilistic relations and declaring the weights of
                 the view's outputs. An MVDB is a (very large) Markov
                 Logic Network. We make two sets of contributions.
                 First, we show that query evaluation on an MVDB is
                 equivalent to evaluating a Union of Conjunctive
                 Query(UCQ) over a tuple-independent database. The
                 translation is exact (thus allowing the techniques
                 developed for tuple independent databases to be carried
                 over to MVDB), yet it is novel and quite non-obvious
                 (some resulting probabilities may be negative!). This
                 translation in itself though may not lead to much gain
                 since the translated query gets complicated as we try
                 to capture more correlations. Our second contribution
                 is to propose a new query evaluation strategy that
                 exploits offline compilation to speed up online query
                 evaluation. Here we utilize and extend our prior work
                 on compilation of UCQ. We validate experimentally our
                 techniques on a large probabilistic database with
                 MarkoViews inferred from the DBLP data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mamouras:2012:CSC,
  author =       "Konstantinos Mamouras and Sigal Oren and Lior Seeman
                 and Lucja Kot and Johannes Gehrke",
  title =        "The complexity of social coordination",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1172--1183",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Coordination is a challenging everyday task; just
                 think of the last time you organized a party or a
                 meeting involving several people. As a growing part of
                 our social and professional life goes online, an
                 opportunity for an improved coordination process
                 arises. Recently, Gupta et al. proposed entangled
                 queries as a declarative abstraction for data-driven
                 coordination, where the difficulty of the coordination
                 task is shifted from the user to the database.
                 Unfortunately, evaluating entangled queries is very
                 hard, and thus previous work considered only a
                 restricted class of queries that satisfy safety (the
                 coordination partners are fixed) and uniqueness (all
                 queries need to be satisfied). In this paper we
                 significantly extend the class of feasible entangled
                 queries beyond uniqueness and safety. First, we show
                 that we can simply drop uniqueness and still
                 efficiently evaluate a set of safe entangled queries.
                 Second, we show that as long as all users coordinate on
                 the same set of attributes, we can give an efficient
                 algorithm for coordination even if the set of queries
                 does not satisfy safety. In an experimental evaluation
                 we show that our algorithms are feasible for a wide
                 spectrum of coordination scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2012:EMW,
  author =       "Xiaofei Zhang and Lei Chen and Min Wang",
  title =        "Efficient multi-way theta-join processing using
                 {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1184--1195",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multi-way Theta-join queries are powerful in
                 describing complex relations and therefore widely
                 employed in real practices. However, existing solutions
                 from traditional distributed and parallel databases for
                 multi-way Theta-join queries cannot be easily extended
                 to fit a shared-nothing distributed computing paradigm,
                 which is proven to be able to support OLAP applications
                 over immense data volumes. In this work, we study the
                 problem of efficient processing of multi-way Theta-join
                 queries using MapReduce from a cost-effective
                 perspective. Although there have been some works using
                 the (key, value) pair-based programming model to
                 support join operations, efficient processing of
                 multi-way Theta-join queries has never been fully
                 explored. The substantial challenge lies in, given a
                 number of processing units (that can run Map or Reduce
                 tasks), mapping a multi-way Theta-join query to a
                 number of MapReduce jobs and having them executed in a
                 well scheduled sequence, such that the total processing
                 time span is minimized. Our solution mainly includes
                 two parts: (1) cost metrics for both single MapReduce
                 job and a number of MapReduce jobs executed in a
                 certain order; (2) the efficient execution of a
                 chain-typed Theta-join with only one MapReduce job.
                 Comparing with the query evaluation strategy proposed
                 in [23] and the widely adopted Pig Latin and Hive SQL
                 solutions, our method achieves significant improvement
                 of the join processing efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lim:2012:STB,
  author =       "Harold Lim and Herodotos Herodotou and Shivnath Babu",
  title =        "{Stubby}: a transformation-based optimizer for
                 {MapReduce} workflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1196--1207",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There is a growing trend of performing analysis on
                 large datasets using workflows composed of MapReduce
                 jobs connected through producer-consumer relationships
                 based on data. This trend has spurred the development
                 of a number of interfaces---ranging from program-based
                 to query-based interfaces---for generating MapReduce
                 workflows. Studies have shown that the gap in
                 performance can be quite large between optimized and
                 unoptimized workflows. However, automatic cost-based
                 optimization of MapReduce workflows remains a challenge
                 due to the multitude of interfaces, large size of the
                 execution plan space, and the frequent unavailability
                 of all types of information needed for optimization. We
                 introduce a comprehensive plan space for MapReduce
                 workflows generated by popular workflow generators. We
                 then propose Stubby, a cost-based optimizer that
                 searches selectively through the subspace of the full
                 plan space that can be enumerated correctly and costed
                 based on the information available in any given
                 setting. Stubby enumerates the plan space based on
                 plan-to-plan transformations and an efficient search
                 algorithm. Stubby is designed to be extensible to new
                 interfaces and new types of optimizations, which is a
                 desirable feature given how rapidly MapReduce systems
                 are evolving. Stubby's efficiency and effectiveness
                 have been evaluated using representative workflows from
                 many domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bao:2012:LWV,
  author =       "Zhuowei Bao and Susan B. Davidson and Tova Milo",
  title =        "Labeling workflow views with fine-grained
                 dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1208--1219",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper considers the problem of efficiently
                 answering reachability queries over views of provenance
                 graphs, derived from executions of workflows that may
                 include recursion. Such views include composite modules
                 and model fine-grained dependencies between module
                 inputs and outputs. A novel view-adaptive dynamic
                 labeling scheme is developed for efficient query
                 evaluation, in which view specifications are labeled
                 statically (i.e. as they are created) and data items
                 are labeled dynamically as they are produced during a
                 workflow execution. Although the combination of
                 fine-grained dependencies and recursive workflows
                 entail, in general, long (linear-size) data labels, we
                 show that for a large natural class of workflows and
                 views, labels are compact (logarithmic-size) and
                 reachability queries can be evaluated in constant time.
                 Experimental results demonstrate the benefit of this
                 approach over the state-of-the-art technique when
                 applied for labeling multiple views.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Szlichta:2012:FOD,
  author =       "Jaros{\l}aw Szlichta and Parke Godfrey and Jarek
                 Gryz",
  title =        "Fundamentals of order dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1220--1231",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Dependencies have played a significant role in
                 database design for many years. They have also been
                 shown to be useful in query optimization. In this
                 paper, we discuss dependencies between
                 lexicographically ordered sets of tuples. We introduce
                 formally the concept of order dependency and present a
                 set of axioms (inference rules) for them. We show how
                 query rewrites based on these axioms can be used for
                 query optimization. We present several interesting
                 theorems that can be derived using the inference rules.
                 We prove that functional dependencies are subsumed by
                 order dependencies and that our set of axioms for order
                 dependencies is sound and complete.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bakibayev:2012:FQE,
  author =       "Nurzhan Bakibayev and Dan Olteanu and Jakub
                 Z{\'a}vodn{\'y}",
  title =        "{FDB}: a query engine for factorised relational
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1232--1243",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Factorised databases are relational databases that use
                 compact factorised representations at the physical
                 layer to reduce data redundancy and boost query
                 performance. This paper introduces FDB, an in-memory
                 query engine for select-project-join queries on
                 factorised databases. Key components of FDB are novel
                 algorithms for query optimisation and evaluation that
                 exploit the succinctness brought by data factorisation.
                 Experiments show that for data sets with many-to-many
                 relationships FDB can outperform relational engines by
                 orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2012:OAW,
  author =       "Yu Cao and Chee-Yong Chan and Jie Li and Kian-Lee
                 Tan",
  title =        "Optimization of analytic window functions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1244--1255",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analytic functions represent the state-of-the-art way
                 of performing complex data analysis within a single SQL
                 statement. In particular, an important class of
                 analytic functions that has been frequently used in
                 commercial systems to support OLAP and decision support
                 applications is the class of window functions. A window
                 function returns for each input tuple a value derived
                 from applying a function over a window of neighboring
                 tuples. However, existing window function evaluation
                 approaches are based on a naive sorting scheme. In this
                 paper, we study the problem of optimizing the
                 evaluation of window functions. We propose several
                 efficient techniques, and identify optimization
                 opportunities that allow us to optimize the evaluation
                 of a set of window functions. We have integrated our
                 scheme into PostgreSQL. Our comprehensive experimental
                 study on the TPC-DS datasets as well as synthetic
                 datasets and queries demonstrate significant speedup
                 over existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hueske:2012:OBB,
  author =       "Fabian Hueske and Mathias Peters and Matthias J. Sax
                 and Astrid Rheinl{\"a}nder and Rico Bergmann and
                 Aljoscha Krettek and Kostas Tzoumas",
  title =        "Opening the black boxes in data flow optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1256--1267",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many systems for big data analytics employ a data flow
                 abstraction to define parallel data processing tasks.
                 In this setting, custom operations expressed as
                 user-defined functions are very common. We address the
                 problem of performing data flow optimization at this
                 level of abstraction, where the semantics of operators
                 are not known. Traditionally, query optimization is
                 applied to queries with known algebraic semantics. In
                 this work, we find that a handful of properties, rather
                 than a full algebraic specification, suffice to
                 establish reordering conditions for data processing
                 operators. We show that these properties can be
                 accurately estimated for black box operators by
                 statically analyzing the general-purpose code of their
                 user-defined functions. We design and implement an
                 optimizer for parallel data flows that does not assume
                 knowledge of semantics or algebraic properties of
                 operators. Our evaluation confirms that the optimizer
                 can apply common rewritings such as selection
                 reordering, bushy join-order enumeration, and limited
                 forms of aggregation push-down, hence yielding similar
                 rewriting power as modern relational DBMS optimizers.
                 Moreover, it can optimize the operator order of
                 nonrelational data flows, a unique feature among
                 today's systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ewen:2012:SFI,
  author =       "Stephan Ewen and Kostas Tzoumas and Moritz Kaufmann
                 and Volker Markl",
  title =        "Spinning fast iterative data flows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1268--1279",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Parallel dataflow systems are a central part of most
                 analytic pipelines for big data. The iterative nature
                 of many analysis and machine learning algorithms,
                 however, is still a challenge for current systems.
                 While certain types of bulk iterative algorithms are
                 supported by novel dataflow frameworks, these systems
                 cannot exploit computational dependencies present in
                 many algorithms, such as graph algorithms. As a result,
                 these algorithms are inefficiently executed and have
                 led to specialized systems based on other paradigms,
                 such as message passing or shared memory. We propose a
                 method to integrate incremental iterations, a form of
                 workset iterations, with parallel dataflows. After
                 showing how to integrate bulk iterations into a
                 dataflow system and its optimizer, we present an
                 extension to the programming model for incremental
                 iterations. The extension alleviates for the lack of
                 mutable state in dataflows and allows for exploiting
                 the sparse computational dependencies inherent in many
                 iterative algorithms. The evaluation of a prototypical
                 implementation shows that those aspects lead to up to
                 two orders of magnitude speedup in algorithm runtime,
                 when exploited. In our experiments, the improved
                 dataflow system is highly competitive with specialized
                 systems while maintaining a transparent and unified
                 dataflow abstraction.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mihaylov:2012:RRD,
  author =       "Svilen R. Mihaylov and Zachary G. Ives and Sudipto
                 Guha",
  title =        "{REX}: recursive, delta-based data-centric
                 computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1280--1291",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In today's Web and social network environments, query
                 workloads include ad hoc and OLAP queries, as well as
                 iterative algorithms that analyze data relationships
                 (e.g., link analysis, clustering, learning). Modern
                 DBMSs support ad hoc and OLAP queries, but most are not
                 robust enough to scale to large clusters. Conversely,
                 ``cloud'' platforms like MapReduce execute chains of
                 batch tasks across clusters in a fault tolerant way,
                 but have too much overhead to support ad hoc queries.
                 Moreover, both classes of platform incur significant
                 overhead in executing iterative data analysis
                 algorithms. Most such iterative algorithms repeatedly
                 refine portions of their answers, until some
                 convergence criterion is reached. However, general
                 cloud platforms typically must reprocess all data in
                 each step. DBMSs that support recursive SQL are more
                 efficient in that they propagate only the changes in
                 each step --- but they still accumulate each
                 iteration's state, even if it is no longer useful.
                 User-defined functions are also typically harder to
                 write for DBMSs than for cloud platforms. We seek to
                 unify the strengths of both styles of platforms, with a
                 focus on supporting iterative computations in which
                 changes, in the form of deltas, are propagated from
                 iteration to iteration, and state is efficiently
                 updated in an extensible way. We present a programming
                 model oriented around deltas, describe how we execute
                 and optimize such programs in our REX runtime system,
                 and validate that our platform also handles failures
                 gracefully. We experimentally validate our techniques,
                 and show speedups over the competing methods ranging
                 from 2.5 to nearly 100 times.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cheng:2012:KRW,
  author =       "James Cheng and Zechao Shang and Hong Cheng and Haixun
                 Wang and Jeffrey Xu Yu",
  title =        "{K}-reach: who is in your small world",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1292--1303",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of answering $k$-hop reachability
                 queries in a directed graph, i.e., whether there exists
                 a directed path of length k, from a source query vertex
                 to a target query vertex in the input graph. The
                 problem of $k$-hop reachability is a general problem of
                 the classic reachability (where $ k = \infty $).
                 Existing indexes for processing classic reachability
                 queries, as well as for processing shortest path
                 queries, are not applicable or not efficient for
                 processing $k$-hop reachability queries. We propose an
                 index for processing $k$-hop reachability queries,
                 which is simple in design and efficient to construct.
                 Our experimental results on a wide range of real
                 datasets show that our index is more efficient than the
                 state-of-the-art indexes even for processing classic
                 reachability queries, for which these indexes are
                 primarily designed. We also show that our index is
                 efficient in answering $k$-hop reachability queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2012:PGD,
  author =       "Wenfei Fan and Xin Wang and Yinghui Wu",
  title =        "Performance guarantees for distributed reachability
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1304--1316",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the real world a graph is often fragmented and
                 distributed across different sites. This highlights the
                 need for evaluating queries on distributed graphs. This
                 paper proposes distributed evaluation algorithms for
                 three classes of queries: reachability for determining
                 whether one node can reach another, bounded
                 reachability for deciding whether there exists a path
                 of a bounded length between a pair of nodes, and
                 regular reachability for checking whether there exists
                 a path connecting two nodes such that the node labels
                 on the path form a string in a given regular
                 expression. We develop these algorithms based on
                 partial evaluation, to explore parallel computation.
                 When evaluating a query Q on a distributed graph G, we
                 show that these algorithms possess the following
                 performance guarantees, no matter how G is fragmented
                 and distributed: (1) each site is visited only once;
                 (2) the total network traffic is determined by the size
                 of Q and the fragmentation of G, independent of the
                 size of G; and (3) the response time is decided by the
                 largest fragment of G rather than the entire G. In
                 addition, we show that these algorithms can be readily
                 implemented in the MapReduce framework. Using synthetic
                 and real-life data, we experimentally verify that these
                 algorithms are scalable on large graphs, regardless of
                 how the graphs are distributed.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chubak:2012:EIQ,
  author =       "Pirooz Chubak and Davood Rafiei",
  title =        "Efficient indexing and querying over syntactically
                 annotated trees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1316--1327",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Natural language text corpora are often available as
                 sets of syntactically parsed trees. A wide range of
                 expressive tree queries are possible over such parsed
                 trees that open a new avenue in searching over natural
                 language text. They not only allow for querying roles
                 and relationships within sentences, but also improve
                 search effectiveness compared to flat keyword queries.
                 One major drawback of current systems supporting
                 querying over parsed text is the performance of
                 evaluating queries over large data. In this paper we
                 propose a novel indexing scheme over unique subtrees as
                 index keys. We also propose a novel root-split coding
                 scheme that stores subtree structural information only
                 partially, thus reducing index size and improving
                 querying performance. Our extensive set of experiments
                 show that root-split coding reduces the index size of
                 any interval coding which stores individual node
                 numbers by a factor of 50\% to 80\%, depending on the
                 sizes of subtrees indexed. Moreover, We show that our
                 index using root-split coding, outperforms previous
                 approaches by at least an order of magnitude in terms
                 of the response time of queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Barany:2012:QGN,
  author =       "Vince B{\'a}r{\'a}ny and Balder ten Cate and Martin
                 Otto",
  title =        "Queries with guarded negation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1328--1339",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A well-established and fundamental insight in database
                 theory is that negation (also known as complementation)
                 tends to make queries difficult to process and
                 difficult to reason about. Many basic problems are
                 decidable and admit practical algorithms in the case of
                 unions of conjunctive queries, but become difficult or
                 even undecidable when queries are allowed to contain
                 negation. Inspired by recent results in finite model
                 theory, we consider a restricted form of negation,
                 guarded negation. We introduce a fragment of SQL,
                 called GN-SQL, as well as a fragment of Datalog with
                 stratified negation, called GN-Datalog, that allow only
                 guarded negation, and we show that these query
                 languages are computationally well behaved, in terms of
                 testing query containment, query evaluation, open-world
                 query answering, and boundedness. GN-SQL and GN-Datalog
                 subsume a number of well known query languages and
                 constraint languages, such as unions of conjunctive
                 queries, monadic Datalog, and frontier-guarded tgds. In
                 addition, an analysis of standard benchmark workloads
                 shows that many uses of negation in SQL in practice are
                 guarded.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2012:PFI,
  author =       "Ninghui Li and Wahbeh Qardaji and Dong Su and Jianneng
                 Cao",
  title =        "{PrivBasis}: frequent itemset mining with differential
                 privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1340--1351",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The discovery of frequent itemsets can serve valuable
                 economic and research purposes. Releasing discovered
                 frequent itemsets, however, presents privacy
                 challenges. In this paper, we study the problem of how
                 to perform frequent itemset mining on transaction
                 databases while satisfying differential privacy. We
                 propose an approach, called PrivBasis, which leverages
                 a novel notion called basis sets. A $ \theta $-basis
                 set has the property that any itemset with frequency
                 higher than $ \theta $ is a subset of some basis. We
                 introduce algorithms for privately constructing a basis
                 set and then using it to find the most frequent
                 itemsets. Experiments show that our approach greatly
                 outperforms the current state of the art.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2012:LRM,
  author =       "Ganzhao Yuan and Zhenjie Zhang and Marianne Winslett
                 and Xiaokui Xiao and Yin Yang and Zhifeng Hao",
  title =        "Low-rank mechanism: optimizing batch queries under
                 differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1352--1363",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Differential privacy is a promising privacy-preserving
                 paradigm for statistical query processing over
                 sensitive data. It works by injecting random noise into
                 each query result, such that it is provably hard for
                 the adversary to infer the presence or absence of any
                 individual record from the published noisy results. The
                 main objective in differentially private query
                 processing is to maximize the accuracy of the query
                 results, while satisfying the privacy guarantees.
                 Previous work, notably the matrix mechanism [16], has
                 suggested that processing a batch of correlated queries
                 as a whole can potentially achieve considerable
                 accuracy gains, compared to answering them
                 individually. However, as we point out in this paper,
                 the matrix mechanism is mainly of theoretical interest;
                 in particular, several inherent problems in its design
                 limit its accuracy in practice, which almost never
                 exceeds that of na{\"\i}ve methods. In fact, we are not
                 aware of any existing solution that can effectively
                 optimize a query batch under differential privacy.
                 Motivated by this, we propose the Low-Rank Mechanism
                 (LRM), the first practical differentially private
                 technique for answering batch queries with high
                 accuracy, based on a low rank approximation of the
                 workload matrix. We prove that the accuracy provided by
                 LRM is close to the theoretical lower bound for any
                 mechanism to answer a batch of queries under
                 differential privacy. Extensive experiments using real
                 data demonstrate that LRM consistently outperforms
                 state-of-the-art query processing solutions under
                 differential privacy, by large margins.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2012:FMR,
  author =       "Jun Zhang and Zhenjie Zhang and Xiaokui Xiao and Yin
                 Yang and Marianne Winslett",
  title =        "Functional mechanism: regression analysis under
                 differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1364--1375",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "$ \epsilon $-differential privacy is the
                 state-of-the-art model for releasing sensitive
                 information while protecting privacy. Numerous methods
                 have been proposed to enforce $ \epsilon $-differential
                 privacy in various analytical tasks, e.g., regression
                 analysis. Existing solutions for regression analysis,
                 however, are either limited to non-standard types of
                 regression or unable to produce accurate regression
                 results. Motivated by this, we propose the Functional
                 Mechanism, a differentially private method designed for
                 a large class of optimization-based analyses. The main
                 idea is to enforce $ \epsilon $-differential privacy by
                 perturbing the objective function of the optimization
                 problem, rather than its results. As case studies, we
                 apply the functional mechanism to address two most
                 widely used regression models, namely, linear
                 regression and logistic regression. Both theoretical
                 analysis and thorough experimental evaluations show
                 that the functional mechanism is highly effective and
                 efficient, and it significantly outperforms existing
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Boldi:2012:IUG,
  author =       "Paolo Boldi and Francesco Bonchi and Aristides Gionis
                 and Tamir Tassa",
  title =        "Injecting uncertainty in graphs for identity
                 obfuscation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1376--1387",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data collected nowadays by social-networking
                 applications create fascinating opportunities for
                 building novel services, as well as expanding our
                 understanding about social structures and their
                 dynamics. Unfortunately, publishing social-network
                 graphs is considered an ill-advised practice due to
                 privacy concerns. To alleviate this problem, several
                 anonymization methods have been proposed, aiming at
                 reducing the risk of a privacy breach on the published
                 data, while still allowing to analyze them and draw
                 relevant conclusions. In this paper we introduce a new
                 anonymization approach that is based on injecting
                 uncertainty in social graphs and publishing the
                 resulting uncertain graphs. While existing approaches
                 obfuscate graph data by adding or removing edges
                 entirely, we propose using a finer-grained perturbation
                 that adds or removes edges partially: this way we can
                 achieve the same desired level of obfuscation with
                 smaller changes in the data, thus maintaining higher
                 utility. Our experiments on real-world networks confirm
                 that at the same level of identity obfuscation our
                 method provides higher usefulness than existing
                 randomized methods that publish standard graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2012:PMR,
  author =       "Jianneng Cao and Panagiotis Karras",
  title =        "Publishing microdata with a robust privacy guarantee",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1388--1399",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today, the publication of microdata poses a privacy
                 threat. Vast research has striven to define the privacy
                 condition that microdata should satisfy before it is
                 released, and devise algorithms to anonymize the data
                 so as to achieve this condition. Yet, no method
                 proposed to date explicitly bounds the percentage of
                 information an adversary gains after seeing the
                 published data for each sensitive value therein. This
                 paper introduces $ \beta $-likeness, an appropriately
                 robust privacy model for microdata anonymization, along
                 with two anonymization schemes designed therefore, the
                 one based on generalization, and the other based on
                 perturbation. Our model postulates that an adversary's
                 confidence on the likelihood of a certain
                 sensitive-attribute (SA) value should not increase, in
                 relative difference terms, by more than a predefined
                 threshold. Our techniques aim to satisfy a given $
                 \beta $ threshold with little information loss. We
                 experimentally demonstrate that (i) our model provides
                 an effective privacy guarantee in a way that
                 predecessor models cannot, (ii) our generalization
                 scheme is more effective and efficient in its task than
                 methods adapting algorithms for the $k$-anonymity
                 model, and (iii) our perturbation method outperforms a
                 baseline approach. Moreover, we discuss in detail the
                 resistance of our model and methods to attacks proposed
                 in previous research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guan:2012:MTE,
  author =       "Ziyu Guan and Xifeng Yan and Lance M. Kaplan",
  title =        "Measuring two-event structural correlations on
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1400--1411",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Real-life graphs usually have various kinds of events
                 happening on them, e.g., product purchases in online
                 social networks and intrusion alerts in computer
                 networks. The occurrences of events on the same graph
                 could be correlated, exhibiting either attraction or
                 repulsion. Such structural correlations can reveal
                 important relationships between different events.
                 Unfortunately, correlation relationships on graph
                 structures are not well studied and cannot be captured
                 by traditional measures. In this work, we design a
                 novel measure for assessing two-event structural
                 correlations on graphs. Given the occurrences of two
                 events, we choose uniformly a sample of ``reference
                 nodes'' from the vicinity of all event nodes and employ
                 the Kendall's $ \tau $ rank correlation measure to
                 compute the average concordance of event density
                 changes. Significance can be efficiently assessed by $
                 \tau $'s nice property of being asymptotically normal
                 under the null hypothesis. In order to compute the
                 measure in large scale networks, we develop a scalable
                 framework using different sampling strategies. The
                 complexity of these strategies is analyzed. Experiments
                 on real graph datasets with both synthetic and real
                 events demonstrate that the proposed framework is not
                 only efficacious, but also efficient and scalable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jestes:2012:RLT,
  author =       "Jeffrey Jestes and Jeff M. Phillips and Feifei Li and
                 Mingwang Tang",
  title =        "Ranking large temporal data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1412--1423",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Ranking temporal data has not been studied until
                 recently, even though ranking is an important operator
                 (being promoted as a first-class citizen) in database
                 systems. However, only the instant top-$k$ queries on
                 temporal data were studied in, where objects with the
                 $k$ highest scores at a query time instance t are to be
                 retrieved. The instant top-$k$ definition clearly comes
                 with limitations (sensitive to outliers, difficult to
                 choose a meaningful query time $t$). A more flexible
                 and general ranking operation is to rank objects based
                 on the aggregation of their scores in a query interval,
                 which we dub the aggregate top-$k$ query on temporal
                 data. For example, return the top-10 weather stations
                 having the highest average temperature from 10/01/2010
                 to 10/07/2010; find the top-20 stocks having the
                 largest total transaction volumes from 02/05/2011 to
                 02/07/2011. This work presents a comprehensive study to
                 this problem by designing both exact and approximate
                 methods (with approximation quality guarantees). We
                 also provide theoretical analysis on the construction
                 cost, the index size, the update and the query costs of
                 each approach. Extensive experiments on large real
                 datasets clearly demonstrate the efficiency, the
                 effectiveness, and the scalability of our methods
                 compared to the baseline methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Funke:2012:CTD,
  author =       "Florian Funke and Alfons Kemper and Thomas Neumann",
  title =        "Compacting transactional data in hybrid {OLTP\&OLAP}
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1424--1435",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Growing main memory sizes have facilitated database
                 management systems that keep the entire database in
                 main memory. The drastic performance improvements that
                 came along with these in-memory systems have made it
                 possible to reunite the two areas of online transaction
                 processing (OLTP) and online analytical processing
                 (OLAP): An emerging class of hybrid OLTP and OLAP
                 database systems allows to process analytical queries
                 directly on the transactional data. By offering
                 arbitrarily current snapshots of the transactional data
                 for OLAP, these systems enable real-time business
                 intelligence. Despite memory sizes of several Terabytes
                 in a single commodity server, RAM is still a precious
                 resource: Since free memory can be used for
                 intermediate results in query processing, the amount of
                 memory determines query performance to a large extent.
                 Consequently, we propose the compaction of
                 memory-resident databases. Compaction consists of two
                 tasks: First, separating the mutable working set from
                 the immutable ``frozen'' data. Second, compressing the
                 immutable data and optimizing it for efficient,
                 memory-consumption-friendly snapshotting. Our approach
                 reorganizes and compresses transactional data online
                 and yet hardly affects the mission-critical OLTP
                 throughput. This is achieved by unburdening the OLTP
                 threads from all additional processing and performing
                 these tasks asynchronously.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hall:2012:PTC,
  author =       "Alexander Hall and Olaf Bachmann and Robert B{\"u}ssow
                 and Silviu Ganceanu and Marc Nunkesser",
  title =        "Processing a trillion cells per mouse click",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1436--1446",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Column-oriented database systems have been a real game
                 changer for the industry in recent years. Highly tuned
                 and performant systems have evolved that provide users
                 with the possibility of answering ad hoc queries over
                 large datasets in an interactive manner. In this paper
                 we present the column-oriented datastore developed as
                 one of the central components of PowerDrill. It
                 combines the advantages of columnar data layout with
                 other known techniques (such as using composite range
                 partitions) and extensive algorithmic engineering on
                 key data structures. The main goal of the latter being
                 to reduce the main memory footprint and to increase the
                 efficiency in processing typical user queries. In this
                 combination we achieve large speed-ups. These enable a
                 highly interactive Web UI where it is common that a
                 single mouse click leads to processing a trillion
                 values in the underlying dataset.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Porobic:2012:OHI,
  author =       "Danica Porobic and Ippokratis Pandis and Miguel Branco
                 and Pinar T{\"o}z{\"u}n and Anastasia Ailamaki",
  title =        "{OLTP} on hardware islands",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1447--1458",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern hardware is abundantly parallel and
                 increasingly heterogeneous. The numerous processing
                 cores have nonuniform access latencies to the main
                 memory and to the processor caches, which causes
                 variability in the communication costs. Unfortunately,
                 database systems mostly assume that all processing
                 cores are the same and that microarchitecture
                 differences are not significant enough to appear in
                 critical database execution paths. As we demonstrate in
                 this paper, however, hardware heterogeneity does appear
                 in the critical path and conventional database
                 architectures achieve suboptimal and even worse,
                 unpredictable performance. We perform a detailed
                 performance analysis of OLTP deployments in servers
                 with multiple cores per CPU (multicore) and multiple
                 CPUs per server (multisocket). We compare different
                 database deployment strategies where we vary the number
                 and size of independent database instances running on a
                 single server, from a single shared-everything instance
                 to fine-grained shared-nothing configurations. We
                 quantify the impact of non-uniform hardware on various
                 deployments by (a) examining how efficiently each
                 deployment uses the available hardware resources and
                 (b) measuring the impact of distributed transactions
                 and skewed requests on different workloads. Finally, we
                 argue in favor of shared-nothing deployments that are
                 topology- and workload-aware and take advantage of fast
                 on-chip communication between islands of cores on the
                 same socket.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Patterson:2012:SSC,
  author =       "Stacy Patterson and Aaron J. Elmore and Faisal Nawab
                 and Divyakant Agrawal and Amr {El Abbadi}",
  title =        "Serializability, not serial: concurrency control and
                 availability in multi-datacenter datastores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1459--1470",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a framework for concurrency control and
                 availability in multi-datacenter datastores. While we
                 consider Google's Megastore as our motivating example,
                 we define general abstractions for key components,
                 making our solution extensible to any system that
                 satisfies the abstraction properties. We first develop
                 and analyze a transaction management and replication
                 protocol based on a straightforward implementation of
                 the Paxos algorithm. Our investigation reveals that
                 this protocol acts as a concurrency prevention
                 mechanism rather than a concurrency control mechanism.
                 We then propose an enhanced protocol called Paxos with
                 Combination and Promotion (Paxos-CP) that provides true
                 transaction concurrency while requiring the same per
                 instance message complexity as the basic Paxos
                 protocol. Finally, we compare the performance of Paxos
                 and Paxos-CP in a multi-datacenter experimental study,
                 and we demonstrate that Paxos-CP results in
                 significantly fewer aborted transactions than basic
                 Paxos.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cheung:2012:APD,
  author =       "Alvin Cheung and Samuel Madden and Owen Arden and
                 Andrew C. Myers",
  title =        "Automatic partitioning of database applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1471--1482",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database-backed applications are nearly ubiquitous in
                 our daily lives. Applications that make many small
                 accesses to the database create two challenges for
                 developers: increased latency and wasted resources from
                 numerous network round trips. A well-known technique to
                 improve transactional database application performance
                 is to convert part of the application into stored
                 procedures that are executed on the database server.
                 Unfortunately, this conversion is often difficult. In
                 this paper we describe Pyxis, a system that takes
                 database-backed applications and automatically
                 partitions their code into two pieces, one of which is
                 executed on the application server and the other on the
                 database server. Pyxis profiles the application and
                 server loads, statically analyzes the code's
                 dependencies, and produces a partitioning that
                 minimizes the number of control transfers as well as
                 the amount of data sent during each transfer. Our
                 experiments using TPC-C and TPC-W show that Pyxis is
                 able to generate partitions with up to 3x reduction in
                 latency and 1.7x improvement in throughput when
                 compared to a traditional non-partitioned
                 implementation and has comparable performance to that
                 of a custom stored procedure implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2012:CCE,
  author =       "Jiannan Wang and Tim Kraska and Michael J. Franklin
                 and Jianhua Feng",
  title =        "{CrowdER}: crowdsourcing entity resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1483--1494",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity resolution is central to data integration and
                 data cleaning. Algorithmic approaches have been
                 improving in quality, but remain far from perfect.
                 Crowdsourcing platforms offer a more accurate but
                 expensive (and slow) way to bring human insight into
                 the process. Previous work has proposed batching
                 verification tasks for presentation to human workers
                 but even with batching, a human-only approach is
                 infeasible for data sets of even moderate size, due to
                 the large numbers of matches to be tested. Instead, we
                 propose a hybrid human-machine approach in which
                 machines are used to do an initial, coarse pass over
                 all the data, and people are used to verify only the
                 most likely matching pairs. We show that for such a
                 hybrid system, generating the minimum number of
                 verification tasks of a given size is NP-Hard, but we
                 develop a novel two-tiered heuristic approach for
                 creating batched tasks. We describe this method, and
                 present the results of extensive experiments on real
                 data sets using a popular crowdsourcing platform. The
                 experiments show that our hybrid approach achieves both
                 good efficiency and high accuracy compared to
                 machine-only or human-only alternatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2012:WAJ,
  author =       "Caleb Chen Cao and Jieying She and Yongxin Tong and
                 Lei Chen",
  title =        "Whom to ask?: jury selection for decision making tasks
                 on micro-blog services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1495--1506",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "It is universal to see people obtain knowledge on
                 micro-blog services by asking others decision making
                 questions. In this paper, we study the Jury Selection
                 Problem(JSP) by utilizing crowdsourcing for decision
                 making tasks on micro-blog services. Specifically, the
                 problem is to enroll a subset of crowd under a limited
                 budget, whose aggregated wisdom via Majority Voting
                 scheme has the lowest probability of drawing a wrong
                 answer(Jury Error Rate-JER). Due to various individual
                 error-rates of the crowd, the calculation of JER is
                 non-trivial. Firstly, we explicitly state that JER is
                 the probability when the number of wrong jurors is
                 larger than half of the size of a jury. To avoid the
                 exponentially increasing calculation of JER, we propose
                 two efficient algorithms and an effective bounding
                 technique. Furthermore, we study the Jury Selection
                 Problem on two crowdsourcing models, one is for
                 altruistic users(AltrM) and the other is for
                 incentive-requiring users(PayM) who require extra
                 payment when enrolled into a task. For the AltrM model,
                 we prove the monotonicity of JER on individual error
                 rate and propose an efficient exact algorithm for JSP.
                 For the PayM model, we prove the NP-hardness of JSP on
                 PayM and propose an efficient greedy-based heuristic
                 algorithm. Finally, we conduct a series of experiments
                 to investigate the traits of JSP, and validate the
                 efficiency and effectiveness of our proposed algorithms
                 on both synthetic and real micro-blog data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2012:AAL,
  author =       "Xiaochun Yang and Honglei Liu and Bin Wang",
  title =        "{ALAE}: accelerating local alignment with affine gap
                 exactly in biosequence databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1507--1518",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of local alignment, which is
                 finding pairs of similar subsequences with gaps. The
                 problem exists in biosequence databases. BLAST is a
                 typical software for finding local alignment based on
                 heuristic, but could miss results. Using the
                 Smith-Waterman algorithm, we can find all local
                 alignments in $ O(m n) $ time, where $m$ and $n$ are
                 lengths of a query and a text, respectively. A recent
                 exact approach BWT-SW improves the complexity of the
                 Smith-Waterman algorithm under constraints, but still
                 much slower than BLAST. This paper takes on the
                 challenge of designing an accurate and efficient
                 algorithm for evaluating local-alignment searches,
                 especially for long queries. In this paper, we propose
                 an efficient software called ALAE to speed up BWT-SW
                 using a compressed suffix array. ALAE utilizes a family
                 of filtering techniques to prune meaningless
                 calculations and an algorithm for reusing score
                 calculations. We also give a mathematical analysis and
                 show that the upper bound of the total number of
                 calculated entries using ALAE could vary from 4.50
                 mn$^{0.520}$ to 9.05 mn$^{0.896}$ for random DNA
                 sequences and vary from 8.28 mn$^{0.364}$ to 7.49
                 mn$^{0.723}$ for random protein sequences. We
                 demonstrate the significant performance improvement of
                 ALAE on BWT-SW using a thorough experimental study on
                 real biosequences. ALAE guarantees correctness and
                 accelerates BLAST for most of parameters.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Candan:2012:SCD,
  author =       "K. Sel{\c{c}}uk Candan and Rosaria Rossini and Xiaolan
                 Wang and Maria Luisa Sapino",
  title =        "{sDTW}: computing {DTW} distances using locally
                 relevant constraints based on salient feature
                 alignments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1519--1530",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many applications generate and consume temporal data
                 and retrieval of time series is a key processing step
                 in many application domains. Dynamic time warping (DTW)
                 distance between time series of size N and M is
                 computed relying on a dynamic programming approach
                 which creates and fills an N x M grid to search for an
                 optimal warp path. Since this can be costly, various
                 heuristics have been proposed to cut away the
                 potentially unproductive portions of the DTW grid. In
                 this paper, we argue that time series often carry
                 structural features that can be used for identifying
                 locally relevant constraints to eliminate redundant
                 work. Relying on this observation, we propose salient
                 feature based sDTW algorithms which first identify
                 robust salient features in the given time series and
                 then find a consistent alignment of these to establish
                 the boundaries for the warp path search. More
                 specifically, we propose alternative fixed
                 core\&adaptive width, adaptive core\&fixed width, and
                 adaptive core\&adaptive width strategies which enforce
                 different constraints reflecting the high level
                 structural characteristics of the series in the data
                 set. Experiment results show that the proposed sDTW
                 algorithms help achieve much higher accuracy in DTW
                 computation and time series retrieval than fixed core
                 \& fixed width algorithms that do not leverage local
                 features of the given time series.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tauheed:2012:SPL,
  author =       "Farhan Tauheed and Thomas Heinis and Felix
                 Sch{\"u}rmann and Henry Markram and Anastasia
                 Ailamaki",
  title =        "{SCOUT}: prefetching for latent structure following
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1531--1542",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today's scientists are quickly moving from in vitro to
                 in silico experimentation: they no longer analyze
                 natural phenomena in a petri dish, but instead they
                 build models and simulate them. Managing and analyzing
                 the massive amounts of data involved in simulations is
                 a major task. Yet, they lack the tools to efficiently
                 work with data of this size. One problem many
                 scientists share is the analysis of the massive spatial
                 models they build. For several types of analysis they
                 need to interactively follow the structures in the
                 spatial model, e.g., the arterial tree, neuron fibers,
                 etc., and issue range queries along the way. Each query
                 takes long to execute, and the total time for executing
                 a sequence of queries significantly delays data
                 analysis. Prefetching the spatial data reduces the
                 response time considerably, but known approaches do not
                 prefetch with high accuracy. We develop SCOUT, a
                 structure-aware method for prefetching data along
                 interactive spatial query sequences. SCOUT uses an
                 approximate graph model of the structures involved in
                 past queries and attempts to identify what particular
                 structure the user follows. Our experiments with
                 neuro-science data show that SCOUT prefetches with an
                 accuracy from 71\% to 92\%, which translates to a
                 speedup of 4x-15x. SCOUT also improves the prefetching
                 accuracy on datasets from other scientific domains,
                 such as medicine and biology.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2012:API,
  author =       "Kaibo Wang and Yin Huai and Rubao Lee and Fusheng Wang
                 and Xiaodong Zhang and Joel H. Saltz",
  title =        "Accelerating pathology image data cross-comparison on
                 {CPU--GPU} hybrid systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1543--1554",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As an important application of spatial databases in
                 pathology imaging analysis, cross-comparing the spatial
                 boundaries of a huge amount of segmented micro-anatomic
                 objects demands extremely data- and compute-intensive
                 operations, requiring high throughput at an affordable
                 cost. However, the performance of spatial database
                 systems has not been satisfactory since their
                 implementations of spatial operations cannot fully
                 utilize the power of modern parallel hardware. In this
                 paper, we provide a customized software solution that
                 exploits GPUs and multi-core CPUs to accelerate spatial
                 cross-comparison in a cost-effective way. Our solution
                 consists of an efficient GPU algorithm and a pipelined
                 system framework with task migration support. Extensive
                 experiments with real-world data sets demonstrate the
                 effectiveness of our solution, which improves the
                 performance of spatial cross-comparison by over 18
                 times compared with a parallelized spatial database
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2012:RER,
  author =       "Jiexing Li and Arnd Christian K{\"o}nig and Vivek
                 Narasayya and Surajit Chaudhuri",
  title =        "Robust estimation of resource consumption for {SQL}
                 queries using statistical techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1555--1566",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ability to estimate resource consumption of SQL
                 queries is crucial for a number of tasks in a database
                 system such as admission control, query scheduling and
                 costing during query optimization. Recent work has
                 explored the use of statistical techniques for resource
                 estimation in place of the manually constructed cost
                 models used in query optimization. Such techniques,
                 which require as training data examples of resource
                 usage in queries, offer the promise of superior
                 estimation accuracy since they can account for factors
                 such as hardware characteristics of the system or bias
                 in cardinality estimates. However, the proposed
                 approaches lack robustness in that they do not
                 generalize well to queries that are different from the
                 training examples, resulting in significant estimation
                 errors. Our approach aims to address this problem by
                 combining knowledge of database query processing with
                 statistical models. We model resource-usage at the
                 level of individual operators, with different models
                 and features for each operator type, and explicitly
                 model the asymptotic behavior of each operator. This
                 results in significantly better estimation accuracy and
                 the ability to estimate resource usage of arbitrary
                 plans, even when they are very different from the
                 training instances. We validate our approach using
                 various large scale real-life and benchmark workloads
                 on Microsoft SQL Server.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Das:2012:WTW,
  author =       "Mahashweta Das and Saravanan Thirumuruganathan and
                 Sihem Amer-Yahia and Gautam Das and Cong Yu",
  title =        "Who tags what?: an analysis framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1567--1578",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The rise of Web 2.0 is signaled by sites such as
                 Flickr, del.icio.us, and YouTube, and social tagging is
                 essential to their success. A typical tagging action
                 involves three components, user, item (e.g., photos in
                 Flickr), and tags (i.e., words or phrases). Analyzing
                 how tags are assigned by certain users to certain items
                 has important implications in helping users search for
                 desired information. In this paper, we explore common
                 analysis tasks and propose a dual mining framework for
                 social tagging behavior mining. This framework is
                 centered around two opposing measures, similarity and
                 diversity, being applied to one or more tagging
                 components, and therefore enables a wide range of
                 analysis scenarios such as characterizing similar users
                 tagging diverse items with similar tags, or diverse
                 users tagging similar items with diverse tags, etc. By
                 adopting different concrete measures for similarity and
                 diversity in the framework, we show that a wide range
                 of concrete analysis problems can be defined and they
                 are NP-Complete in general. We design efficient
                 algorithms for solving many of those problems and
                 demonstrate, through comprehensive experiments over
                 real data, that our algorithms significantly
                 out-perform the exact brute-force approach without
                 compromising analysis result quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2012:GFE,
  author =       "Haohan Zhu and George Kollios and Vassilis Athitsos",
  title =        "A generic framework for efficient and effective
                 subsequence retrieval",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1579--1590",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper proposes a general framework for matching
                 similar subsequences in both time series and string
                 databases. The matching results are pairs of query
                 subsequences and database subsequences. The framework
                 finds all possible pairs of similar subsequences if the
                 distance measure satisfies the ``consistency''
                 property, which is a property introduced in this paper.
                 We show that most popular distance functions, such as
                 the Euclidean distance, DTW, ERP, the Frech{\'e}t
                 distance for time series, and the Hamming distance and
                 Levenshtein distance for strings, are all
                 ``consistent''. We also propose a generic index
                 structure for metric spaces named ``reference net''.
                 The reference net occupies $ O(n) $ space, where $n$ is
                 the size of the dataset and is optimized to work well
                 with our framework. The experiments demonstrate the
                 ability of our method to improve retrieval performance
                 when combined with diverse distance measures. The
                 experiments also illustrate that the reference net
                 scales well in terms of space overhead and query
                 time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dittrich:2012:OAE,
  author =       "Jens Dittrich and Jorge-Arnulfo Quian{\'e}-Ruiz and
                 Stefan Richter and Stefan Schuh and Alekh Jindal and
                 J{\"o}rg Schad",
  title =        "Only aggressive elephants are fast elephants",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1591--1602",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Yellow elephants are slow. A major reason is that they
                 consume their inputs entirely before responding to an
                 elephant rider's orders. Some clever riders have
                 trained their yellow elephants to only consume parts of
                 the inputs before responding. However, the teaching
                 time to make an elephant do that is high. So high that
                 the teaching lessons often do not pay off. We take a
                 different approach. We make elephants aggressive; only
                 this will make them very fast. We propose HAIL (Hadoop
                 Aggressive Indexing Library), an enhancement of HDFS
                 and Hadoop MapReduce that dramatically improves
                 runtimes of several classes of MapReduce jobs. HAIL
                 changes the upload pipeline of HDFS in order to create
                 different clustered indexes on each data block replica.
                 An interesting feature of HAIL is that we typically
                 create a win-win situation: we improve both data upload
                 to HDFS and the runtime of the actual Hadoop MapReduce
                 job. In terms of data upload, HAIL improves over HDFS
                 by up to 60\% with the default replication factor of
                 three. In terms of query execution, we demonstrate that
                 HAIL runs up to 68x faster than Hadoop. In our
                 experiments, we use six clusters including physical and
                 EC2 clusters of up to 100 nodes. A series of
                 scalability experiments also demonstrates the
                 superiority of HAIL.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2012:MLP,
  author =       "Rui Li and Shengjie Wang and Kevin Chen-Chuan Chang",
  title =        "Multiple location profiling for users and
                 relationships from social network and content",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1603--1614",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Users' locations are important for many applications
                 such as personalized search and localized content
                 delivery. In this paper, we study the problem of
                 profiling Twitter users' locations with their following
                 network and tweets. We propose a multiple location
                 profiling model (MLP), which has three key features:
                 (1) it formally models how likely a user follows
                 another user given their locations and how likely a
                 user tweets a venue given his location, (2) it
                 fundamentally captures that a user has multiple
                 locations and his following relationships and tweeted
                 venues can be related to any of his locations, and some
                 of them are even noisy, and (3) it novelly utilizes the
                 home locations of some users as partial supervision. As
                 a result, MLP not only discovers users' locations
                 accurately and completely, but also ``explains'' each
                 following relationship by revealing users' true
                 locations in the relationship. Experiments on a
                 large-scale data set demonstrate those advantages.
                 Particularly, (1) for predicting users' home locations,
                 MLP successfully places 62\% users and out-performs two
                 state-of-the-art methods by 10\% in accuracy, (2) for
                 discovering users' multiple locations, MLP improves the
                 baseline methods by 14\% in recall, and (3) for
                 explaining following relationships, MLP achieves 57\%
                 accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kang:2012:FBE,
  author =       "Woon-Hak Kang and Sang-Won Lee and Bongki Moon",
  title =        "Flash-based extended cache for higher throughput and
                 faster recovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1615--1626",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Considering the current price gap between disk and
                 flash memory drives, for applications dealing with
                 large scale data, it will be economically more sensible
                 to use flash memory drives to supplement disk drives
                 rather than to replace them. This paper presents FaCE,
                 which is a new low-overhead caching strategy that uses
                 flash memory as an extension to the DRAM buffer. FaCE
                 aims at improving the transaction throughput as well as
                 shortening the recovery time from a system failure. To
                 achieve the goals, we propose two novel algorithms for
                 flash cache management, namely, Multi-Version FIFO
                 replacement and Group Second Chance. One striking
                 result from FaCE is that using a small flash memory
                 drive as a caching device could deliver even higher
                 throughput than using a large flash memory drive to
                 store the entire database tables. This was possible due
                 to flash write optimization as well as disk access
                 reduction obtained by the FaCE caching methods. In
                 addition, FaCE takes advantage of the non-volatility of
                 flash memory to fully support database recovery by
                 extending the scope of a persistent database to include
                 the data pages stored in the flash cache. We have
                 implemented FaCE in the PostgreSQL open source database
                 server and demonstrated its effectiveness for TPC-C
                 benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bender:2012:DTH,
  author =       "Michael A. Bender and Martin Farach-Colton and Rob
                 Johnson and Russell Kraner and Bradley C. Kuszmaul and
                 Dzejla Medjedovic and Pablo Montes and Pradeep Shetty
                 and Richard P. Spillane and Erez Zadok",
  title =        "Don't thrash: how to cache your hash on flash",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1627--1637",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents new alternatives to the well-known
                 Bloom filter data structure. The Bloom filter, a
                 compact data structure supporting set insertion and
                 membership queries, has found wide application in
                 databases, storage systems, and networks. Because the
                 Bloom filter performs frequent random reads and writes,
                 it is used almost exclusively in RAM, limiting the size
                 of the sets it can represent. This paper first
                 describes the quotient filter, which supports the basic
                 operations of the Bloom filter, achieving roughly
                 comparable performance in terms of space and time, but
                 with better data locality. Operations on the quotient
                 filter require only a small number of contiguous
                 accesses. The quotient filter has other advantages over
                 the Bloom filter: it supports deletions, it can be
                 dynamically resized, and two quotient filters can be
                 efficiently merged. The paper then gives two data
                 structures, the buffered quotient filter and the
                 cascade filter, which exploit the quotient filter
                 advantages and thus serve as SSD-optimized alternatives
                 to the Bloom filter. The cascade filter has better
                 asymptotic I/O performance than the buffered quotient
                 filter, but the buffered quotient filter outperforms
                 the cascade filter on small to medium data sets. Both
                 data structures significantly outperform
                 recently-proposed SSD-optimized Bloom filter variants,
                 such as the elevator Bloom filter, buffered Bloom
                 filter, and forest-structured Bloom filter. In
                 experiments, the cascade filter and buffered quotient
                 filter performed insertions 8.6--11 times faster than
                 the fastest Bloom filter variant and performed lookups
                 0.94--2.56 times faster.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Isele:2012:LEL,
  author =       "Robert Isele and Christian Bizer",
  title =        "Learning expressive linkage rules using genetic
                 programming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1638--1649",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A central problem in data integration and data
                 cleansing is to find entities in different data sources
                 that describe the same real-world object. Many existing
                 methods for identifying such entities rely on explicit
                 linkage rules which specify the conditions that
                 entities must fulfill in order to be considered to
                 describe the same real-world object. In this paper, we
                 present the GenLink algorithm for learning expressive
                 linkage rules from a set of existing reference links
                 using genetic programming. The algorithm is capable of
                 generating linkage rules which select discriminative
                 properties for comparison, apply chains of data
                 transformations to normalize property values, choose
                 appropriate distance measures and thresholds and
                 combine the results of multiple comparisons using
                 non-linear aggregation functions. Our experiments show
                 that the GenLink algorithm outperforms the
                 state-of-the-art genetic programming approach to
                 learning linkage rules recently presented by Carvalho
                 et. al. and is capable of learning linkage rules which
                 achieve a similar accuracy as human written rules for
                 the same problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tong:2012:MFI,
  author =       "Yongxin Tong and Lei Chen and Yurong Cheng and Philip
                 S. Yu",
  title =        "Mining frequent itemsets over uncertain databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1650--1661",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In recent years, due to the wide applications of
                 uncertain data, mining frequent itemsets over uncertain
                 databases has attracted much attention. In uncertain
                 databases, the support of an itemset is a random
                 variable instead of a fixed occurrence counting of this
                 itemset. Thus, unlike the corresponding problem in
                 deterministic databases where the frequent itemset has
                 a unique definition, the frequent itemset under
                 uncertain environments has two different definitions so
                 far. The first definition, referred as the expected
                 support-based frequent itemset, employs the expectation
                 of the support of an itemset to measure whether this
                 itemset is frequent. The second definition, referred as
                 the probabilistic frequent itemset, uses the
                 probability of the support of an itemset to measure its
                 frequency. Thus, existing work on mining frequent
                 itemsets over uncertain databases is divided into two
                 different groups and no study is conducted to
                 comprehensively compare the two different definitions.
                 In addition, since no uniform experimental platform
                 exists, current solutions for the same definition even
                 generate inconsistent results. In this paper, we
                 firstly aim to clarify the relationship between the two
                 different definitions. Through extensive experiments,
                 we verify that the two definitions have a tight
                 connection and can be unified together when the size of
                 data is large enough. Secondly, we provide baseline
                 implementations of eight existing representative
                 algorithms and test their performances with uniform
                 measures fairly. Finally, according to the fair tests
                 over many different benchmark data sets, we clarify
                 several existing inconsistent conclusions and discuss
                 some new findings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dallachiesa:2012:UTS,
  author =       "Michele Dallachiesa and Besmira Nushi and Katsiaryna
                 Mirylenka and Themis Palpanas",
  title =        "Uncertain time-series similarity: return to the
                 basics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1662--1673",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the last years there has been a considerable
                 increase in the availability of continuous sensor
                 measurements in a wide range of application domains,
                 such as Location-Based Services (LBS), medical
                 monitoring systems, manufacturing plants and
                 engineering facilities to ensure efficiency, product
                 quality and safety, hydrologic and geologic observing
                 systems, pollution management, and others. Due to the
                 inherent imprecision of sensor observations, many
                 investigations have recently turned into querying,
                 mining and storing uncertain data. Uncertainty can also
                 be due to data aggregation, privacy-preserving
                 transforms, and error-prone mining algorithms. In this
                 study, we survey the techniques that have been proposed
                 specifically for modeling and processing uncertain time
                 series, an important model for temporal data. We
                 provide an analytical evaluation of the alternatives
                 that have been proposed in the literature, highlighting
                 the advantages and disadvantages of each approach, and
                 further compare these alternatives with two additional
                 techniques that were carefully studied before. We
                 conduct an extensive experimental evaluation with 17
                 real datasets, and discuss some surprising results,
                 which suggest that a fruitful research direction is to
                 take into account the temporal correlations in the time
                 series. Based on our evaluations, we also provide
                 guidelines useful for the practitioners in the field.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dasu:2012:SDC,
  author =       "Tamraparni Dasu and Ji Meng Loh",
  title =        "Statistical distortion: consequences of data
                 cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1674--1683",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We introduce the notion of statistical distortion as
                 an essential metric for measuring the effectiveness of
                 data cleaning strategies. We use this metric to propose
                 a widely applicable yet scalable experimental framework
                 for evaluating data cleaning strategies along three
                 dimensions: glitch improvement, statistical distortion
                 and cost-related criteria. Existing metrics focus on
                 glitch improvement and cost, but not on the statistical
                 impact of data cleaning strategies. We illustrate our
                 framework on real world data, with a comprehensive
                 suite of experiments and analyses.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lang:2012:TEE,
  author =       "Willis Lang and Stavros Harizopoulos and Jignesh M.
                 Patel and Mehul A. Shah and Dimitris Tsirogiannis",
  title =        "Towards energy-efficient database cluster design",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "11",
  pages =        "1684--1695",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:15 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Energy is a growing component of the operational cost
                 for many ``big data'' deployments, and hence has become
                 increasingly important for practitioners of large-scale
                 data analysis who require scale-out clusters or
                 parallel DBMS appliances. Although a number of recent
                 studies have investigated the energy efficiency of
                 DBMSs, none of these studies have looked at the
                 architectural design space of energy-efficient parallel
                 DBMS clusters. There are many challenges to increasing
                 the energy efficiency of a DBMS cluster, including
                 dealing with the inherent scaling inefficiency of
                 parallel data processing, and choosing the appropriate
                 energy-efficient hardware. In this paper, we
                 experimentally examine and analyze a number of key
                 parameters related to these challenges for designing
                 energy-efficient database clusters. We explore the
                 cluster design space using empirical results and
                 propose a model that considers the key bottlenecks to
                 energy efficiency in a parallel DBMS. This paper
                 represents a key first step in designing
                 energy-efficient database clusters, which is
                 increasingly important given the trend toward parallel
                 database appliances.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jensen:2012:DMS,
  author =       "Christian S. Jensen",
  title =        "Data management on the spatial web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1696--1696",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Due in part to the increasing mobile use of the web
                 and the proliferation of geo-positioning, the web is
                 fast acquiring a significant spatial aspect. Content
                 and users are being augmented with locations that are
                 used increasingly by location-based services. Studies
                 suggest that each week, several billion web queries are
                 issued that have local intent and target spatial web
                 objects. These are points of interest with a web
                 presence, and they thus have locations as well as
                 textual descriptions. This development has given
                 prominence to spatial web data management, an area ripe
                 with new and exciting opportunities and challenges. The
                 research community has embarked on inventing and
                 supporting new query functionality for the spatial web.
                 Different kinds of spatial web queries return objects
                 that are near a location argument and are relevant to a
                 text argument. To support such queries, it is important
                 to be able to rank objects according to their relevance
                 to a query. And it is important to be able to process
                 the queries with low latency. The talk offers an
                 overview of key aspects of the spatial web. Based on
                 recent results obtained by the speaker and his
                 colleagues, the talk explores new query functionality
                 enabled by the setting. Further, the talk offers
                 insight into the data management techniques capable of
                 supporting such functionality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dietrich:2012:DAO,
  author =       "Brenda Dietrich",
  title =        "Data analytics opportunities in a smarter planet",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1697--1697",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "New applications of computing are being enabled by
                 instrumentation of physical entities, aggregation of
                 data, and the analysis of the data. The resulting
                 integration of information and control permits
                 efficient and effective management of complex man-made
                 systems. Examples include transportation systems,
                 buildings, electrical grids, health care systems,
                 governments, and supply chains. Achieving this vision
                 requires extensive data integration and analysis, over
                 diverse, rapidly changing, and often uncertain data.
                 There are many challenges, requiring both new data
                 management techniques as well as new mathematics,
                 forcing new collaborations as the basis of the new
                 ``Data Science''. Needs and opportunities will be
                 discussed in the context of specific pilots and
                 projects.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sahin:2012:CEM,
  author =       "Kenan Sahin",
  title =        "Challenges in economic massive content storage and
                 management ({MCSAM}) in the era of self-organizing,
                 self-expanding and self-linking data clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1698--1698",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Rapid spread of social networks, global on-line
                 shopping, post 9/11 security oriented linking of data
                 bases and foremost the global adoption of smart
                 phones/devices, among other phenomena, are transforming
                 data clusters into dynamic and almost uncontrollable
                 entities that have their own local intelligence,
                 clients and objectives. The scale and rapidity of
                 change is such that large scale innovations in content
                 storage and management are urgently needed if the
                 diseconomies of scale and complexity are to be
                 mitigated. The field needs to reinvent itself.
                 Istanbul, a city that has reinvented itself many times
                 is an excellent venue to engage in such a discussion
                 and for me to offer suggestions and proposals that
                 derive from personal experiences that span academia,
                 start ups, R\&D firms and Bell Labs as well my early
                 years spent in Istanbul.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Manku:2012:AFC,
  author =       "Gurmeet Singh Manku and Rajeev Motwani",
  title =        "Approximate frequency counts over data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1699--1699",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Research in data stream algorithms has blossomed since
                 late 90s. The talk will trace the history of the
                 Approximate Frequency Counts paper, how it was
                 conceptualized and how it influenced data stream
                 research. The talk will also touch upon a recent
                 development: analysis of personal data streams for
                 improving our quality of lives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hellerstein:2012:MAL,
  author =       "Joseph M. Hellerstein and Christoper R{\'e} and
                 Florian Schoppmann and Daisy Zhe Wang and Eugene
                 Fratkin and Aleksander Gorajek and Kee Siong Ng and
                 Caleb Welton and Xixuan Feng and Kun Li and Arun
                 Kumar",
  title =        "The {MADlib} analytics library: or {MAD} skills, the
                 {SQL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1700--1711",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MADlib is a free, open-source library of in-database
                 analytic methods. It provides an evolving suite of
                 SQL-based algorithms for machine learning, data mining
                 and statistics that run at scale within a database
                 engine, with no need for data import/export to other
                 tools. The goal is for MADlib to eventually serve a
                 role for scalable database systems that is similar to
                 the CRAN library for R: a community repository of
                 statistical methods, this time written with scale and
                 parallelism in mind. In this paper we introduce the
                 MADlib project, including the background that led to
                 its beginnings, and the motivation for its open-source
                 nature. We provide an overview of the library's
                 architecture and design patterns, and provide a
                 description of various statistical methods in that
                 context. We include performance and speedup results of
                 a core design pattern from one of those methods over
                 the Greenplum parallel DBMS on a modest-sized test
                 cluster. We then report on two initial efforts at
                 incorporating academic research into MADlib, which is
                 one of the project's goals. MADlib is freely available
                 at http://madlib.net, and the project is open for
                 contributions of both new methods, and ports to
                 additional database platforms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Floratou:2012:CEH,
  author =       "Avrilia Floratou and Nikhil Teletia and David J.
                 DeWitt and Jignesh M. Patel and Donghui Zhang",
  title =        "Can the elephants handle the {NoSQL} onslaught?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1712--1723",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this new era of ``big data'', traditional DBMSs are
                 under attack from two sides. At one end of the
                 spectrum, the use of document store NoSQL systems (e.g.
                 MongoDB) threatens to move modern Web 2.0 applications
                 away from traditional RDBMSs. At the other end of the
                 spectrum, big data DSS analytics that used to be the
                 domain of parallel RDBMSs is now under attack by
                 another class of NoSQL data analytics systems, such as
                 Hive on Hadoop. So, are the traditional RDBMSs, aka
                 ``big elephants'', doomed as they are challenged from
                 both ends of this ``big data'' spectrum? In this paper,
                 we compare one representative NoSQL system from each
                 end of this spectrum with SQL Server, and analyze the
                 performance and scalability aspects of each of these
                 approaches (NoSQL vs. SQL) on two workloads (decision
                 support analysis and interactive data-serving) that
                 represent the two ends of the application spectrum. We
                 present insights from this evaluation and speculate on
                 potential trends for the future.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rabl:2012:SBD,
  author =       "Tilmann Rabl and Sergio G{\'o}mez-Villamor and
                 Mohammad Sadoghi and Victor Munt{\'e}s-Mulero and
                 Hans-Arno Jacobsen and Serge Mankovskii",
  title =        "Solving big data challenges for enterprise application
                 performance management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1724--1735",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As the complexity of enterprise systems increases, the
                 need for monitoring and analyzing such systems also
                 grows. A number of companies have built sophisticated
                 monitoring tools that go far beyond simple resource
                 utilization reports. For example, based on
                 instrumentation and specialized APIs, it is now
                 possible to monitor single method invocations and trace
                 individual transactions across geographically
                 distributed systems. This high-level of detail enables
                 more precise forms of analysis and prediction but comes
                 at the price of high data rates (i.e., big data). To
                 maximize the benefit of data monitoring, the data has
                 to be stored for an extended period of time for
                 ulterior analysis. This new wave of big data analytics
                 imposes new challenges especially for the application
                 performance monitoring systems. The monitoring data has
                 to be stored in a system that can sustain the high data
                 rates and at the same time enable an up-to-date view of
                 the underlying infrastructure. With the advent of
                 modern key--value stores, a variety of data storage
                 systems have emerged that are built with a focus on
                 scalability and high data rates as predominant in this
                 monitoring use case. In this work, we present our
                 experience and a comprehensive performance evaluation
                 of six modern (open-source) data stores in the context
                 of application performance monitoring as part of CA
                 Technologies initiative. We evaluated these systems
                 with data and workloads that can be found in
                 application performance monitoring, as well as, on-line
                 advertisement, power monitoring, and many other use
                 cases. We present our insights not only as performance
                 results but also as lessons learned and our experience
                 relating to the setup and configuration complexity of
                 these data stores in an industry setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shinnar:2012:MIP,
  author =       "Avraham Shinnar and David Cunningham and Vijay
                 Saraswat and Benjamin Herta",
  title =        "{M3R}: increased performance for in-memory {Hadoop}
                 jobs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1736--1747",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Main Memory Map Reduce (M3R) is a new implementation
                 of the Hadoop Map Reduce (HMR) API targeted at online
                 analytics on high mean-time-to-failure clusters. It
                 does not support resilience, and supports only those
                 workloads which can fit into cluster memory. In return,
                 it can run HMR jobs unchanged --- including jobs
                 produced by compilers for higher-level languages such
                 as Pig, Jaql, and SystemML and interactive front-ends
                 like IBM BigSheets --- while providing significantly
                 better performance than the Hadoop engine on several
                 workloads (e.g. 45x on some input sizes for sparse
                 matrix vector multiply). M3R also supports extensions
                 to the HMR API which can enable Map Reduce jobs to run
                 faster on the M3R engine, while not affecting their
                 performance under the Hadoop engine.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rosch:2012:SAH,
  author =       "Philipp R{\"o}sch and Lars Dannecker and Franz
                 F{\"a}rber and Gregor Hackenbroich",
  title =        "A storage advisor for hybrid-store databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1748--1758",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the SAP HANA database, SAP offers a
                 high-performance in-memory hybrid-store database.
                 Hybrid-store databases---that is, databases supporting
                 row- and column-oriented data management---are getting
                 more and more prominent. While the columnar management
                 offers high-performance capabilities for analyzing
                 large quantities of data, the row-oriented store can
                 handle transactional point queries as well as inserts
                 and updates more efficiently. To effectively take
                 advantage of both stores at the same time the novel
                 question whether to store the given data row- or
                 column-oriented arises. We tackle this problem with a
                 storage advisor tool that supports database
                 administrators at this decision. Our proposed storage
                 advisor recommends the optimal store based on data and
                 query characteristics; its core is a cost model to
                 estimate and compare query execution times for the
                 different stores. Besides a per-table decision, our
                 tool also considers to horizontally and vertically
                 partition the data and manage the partitions on
                 different stores. We evaluated the storage advisor for
                 the use in the SAP HANA database; we show the
                 recommendation quality as well as the benefit of having
                 the data in the optimal store with respect to increased
                 query performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Switakowski:2012:CSP,
  author =       "Micha{\l} {\'S}witakowski and Peter Boncz and Marcin
                 Zukowski",
  title =        "From cooperative scans to predictive buffer
                 management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1759--1770",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In analytical applications, database systems often
                 need to sustain workloads with multiple concurrent
                 scans hitting the same table. The Cooperative Scans
                 (CScans) framework, which introduces an Active Buffer
                 Manager (ABM) component into the database architecture,
                 has been the most effective and elaborate response to
                 this problem, and was initially developed in the X100
                 research prototype. We now report on the experiences of
                 integrating Cooperative Scans into its
                 industrial-strength successor, the Vectorwise database
                 product. During this implementation we invented a
                 simpler optimization of concurrent scan buffer
                 management, called Predictive Buffer Management (PBM).
                 PBM is based on the observation that in a workload with
                 long-running scans, the buffer manager has quite a bit
                 of information on the workload in the immediate future,
                 such that an approximation of the ideal OPT algorithm
                 becomes feasible. In the evaluation on both synthetic
                 benchmarks as well as a TPC-H throughput run we compare
                 the benefits of naive buffer management (LRU) versus
                 CScans, PBM and OPT; showing that PBM achieves benefits
                 close to Cooperative Scans, while incurring much lower
                 architectural impact.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2012:ULI,
  author =       "George Lee and Jimmy Lin and Chuang Liu and Andrew
                 Lorek and Dmitriy Ryaboy",
  title =        "The unified logging infrastructure for data analytics
                 at {Twitter}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1771--1780",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In recent years, there has been a substantial amount
                 of work on large-scale data analytics using
                 Hadoop-based platforms running on large clusters of
                 commodity machines. A less-explored topic is how those
                 data, dominated by application logs, are collected and
                 structured to begin with. In this paper, we present
                 Twitter's production logging infrastructure and its
                 evolution from application-specific logging to a
                 unified ``client events'' log format, where messages
                 are captured in common, well-formatted, flexible Thrift
                 messages. Since most analytics tasks consider the user
                 session as the basic unit of analysis, we
                 pre-materialize ``session sequences'', which are
                 compact summaries that can answer a large class of
                 common queries quickly. The development of this
                 infrastructure has streamlined log collection and data
                 analysis, thereby improving our ability to rapidly
                 experiment and iterate on various aspects of the
                 service.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Talius:2012:TLB,
  author =       "Tomas Talius and Robin Dhamankar and Andrei Dumitrache
                 and Hanuma Kodavalla",
  title =        "Transaction log based application error recovery and
                 point in-time query",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1781--1789",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database backups have traditionally been used as the
                 primary mechanism to recover from hardware and user
                 errors. High availability solutions maintain redundant
                 copies of data that can be used to recover from most
                 failures except user or application errors. Database
                 backups are neither space nor time efficient for
                 recovering from user errors which typically occur in
                 the recent past and affect a small portion of the
                 database. Moreover periodic full backups impact user
                 workload and increase storage costs. In this paper we
                 present a scheme that can be used for both user and
                 application error recovery starting from the current
                 state and rewinding the database back in time using the
                 transaction log. While we provide a consistent view of
                 the entire database as of a point in time in the past,
                 the actual prior versions are produced only for data
                 that is accessed. We make the as of data accessible to
                 arbitrary point in time queries by integrating with the
                 database snapshot feature in Microsoft SQL Server.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lamb:2012:VAD,
  author =       "Andrew Lamb and Matt Fuller and Ramakrishna
                 Varadarajan and Nga Tran and Ben Vandiver and Lyric
                 Doshi and Chuck Bear",
  title =        "The {Vertica Analytic Database}: {C-Store} 7 years
                 later",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1790--1801",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper describes the system architecture of the
                 Vertica Analytic Database (Vertica), a
                 commercialization of the design of the C-Store research
                 prototype. Vertica demonstrates a modern commercial
                 RDBMS system that presents a classical relational
                 interface while at the same time achieving the high
                 performance expected from modern ``web scale'' analytic
                 systems by making appropriate architectural choices.
                 Vertica is also an instructive lesson in how academic
                 systems research can be directly commercialized into a
                 successful product.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2012:IAP,
  author =       "Yanpei Chen and Sara Alspaugh and Randy Katz",
  title =        "Interactive analytical processing in big data systems:
                 a cross-industry study of {MapReduce} workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1802--1813",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Within the past few years, organizations in diverse
                 industries have adopted MapReduce-based systems for
                 large-scale data processing. Along with these new
                 users, important new workloads have emerged which
                 feature many small, short, and increasingly interactive
                 jobs in addition to the large, long-running batch jobs
                 for which MapReduce was originally designed. As
                 interactive, large-scale query processing is a strength
                 of the RDBMS community, it is important that lessons
                 from that field be carried over and applied where
                 possible in this new domain. However, these new
                 workloads have not yet been described in the
                 literature. We fill this gap with an empirical analysis
                 of MapReduce traces from six separate business-critical
                 deployments inside Facebook and at Cloudera customers
                 in e-commerce, telecommunications, media, and retail.
                 Our key contribution is a characterization of new
                 MapReduce workloads which are driven in part by
                 interactive analysis, and which make heavy use of
                 query-like programming frameworks on top of MapReduce.
                 These workloads display diverse behaviors which
                 invalidate prior assumptions about MapReduce such as
                 uniform data access, regular diurnal patterns, and
                 prevalence of large jobs. A secondary contribution is a
                 first step towards creating a TPC-like data processing
                 benchmark for MapReduce.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lam:2012:MMS,
  author =       "Wang Lam and Lu Liu and Sts Prasad and Anand Rajaraman
                 and Zoheb Vacheri and AnHai Doan",
  title =        "{Muppet}: {MapReduce}-style processing of fast data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1814--1825",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MapReduce has emerged as a popular method to process
                 big data. In the past few years, however, not just big
                 data, but fast data has also exploded in volume and
                 availability. Examples of such data include sensor data
                 streams, the Twitter Firehose, and Facebook updates.
                 Numerous applications must process fast data. Can we
                 provide a MapReduce-style framework so that developers
                 can quickly write such applications and execute them
                 over a cluster of machines, to achieve low latency and
                 high scalability? In this paper we report on our
                 investigation of this question, as carried out at
                 Kosmix and WalmartLabs. We describe MapUpdate, a
                 framework like MapReduce, but specifically developed
                 for fast data. We describe Muppet, our implementation
                 of MapUpdate. Throughout the description we highlight
                 the key challenges, argue why MapReduce is not well
                 suited to address them, and briefly describe our
                 current solutions. Finally, we describe our experience
                 and lessons learned with Muppet, which has been used
                 extensively at Kosmix and WalmartLabs to power a broad
                 range of applications in social media and e-commerce.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jacques-Silva:2012:BUD,
  author =       "Gabriela Jacques-Silva and Bugra Gedik and Rohit Wagle
                 and Kun-Lung Wu and Vibhore Kumar",
  title =        "Building user-defined runtime adaptation routines for
                 stream processing applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1826--1837",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Stream processing applications are deployed as
                 continuous queries that run from the time of their
                 submission until their cancellation. This deployment
                 mode limits developers who need their applications to
                 perform runtime adaptation, such as algorithmic
                 adjustments, incremental job deployment, and
                 application-specific failure recovery. Currently,
                 developers do runtime adaptation by using external
                 scripts and/or by inserting operators into the stream
                 processing graph that are unrelated to the data
                 processing logic. In this paper, we describe a
                 component called orchestrator that allows users to
                 write routines for automatically adapting the
                 application to runtime conditions. Developers build an
                 orchestrator by registering and handling events as well
                 as specifying actuations. Events can be generated due
                 to changes in the system state (e.g., application
                 component failures), built-in system metrics (e.g.,
                 throughput of a connection), or custom application
                 metrics (e.g., quality score). Once the orchestrator
                 receives an event, users can take adaptation actions by
                 using the orchestrator actuation APIs. We demonstrate
                 the use of the orchestrator in IBM's System S in the
                 context of three different applications, illustrating
                 application adaptation to changes on the incoming data
                 distribution, to application failures, and on-demand
                 dynamic composition.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2012:MSP,
  author =       "Junchen Jiang and Hongji Bao and Edward Y. Chang and
                 Yuqian Li",
  title =        "{MOIST}: a scalable and parallel moving object indexer
                 with school tracking",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1838--1849",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Location-Based Service (LBS) is rapidly becoming the
                 next ubiquitous technology for a wide range of mobile
                 applications. To support applications that demand
                 nearest-neighbor and history queries, an LBS spatial
                 indexer must be able to efficiently update, query,
                 archive and mine location records, which can be in
                 contention with each other. In this work, we propose
                 MOIST, whose baseline is a recursive spatial
                 partitioning indexer built upon BigTable. To reduce
                 update and query contention, MOIST groups nearby
                 objects of similar trajectory into the same school, and
                 keeps track of only the history of school leaders. This
                 dynamic clustering scheme can eliminate redundant
                 updates and hence reduce update latency. To improve
                 history query processing, MOIST keeps some history data
                 in memory, while it flushes aged data onto parallel
                 disks in a locality-preserving way. Through
                 experimental studies, we show that MOIST can support
                 highly efficient nearest-neighbor and history queries
                 and can scale well with an increasing number of users
                 and update frequency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ports:2012:SSI,
  author =       "Dan R. K. Ports and Kevin Grittner",
  title =        "Serializable snapshot isolation in {PostgreSQL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1850--1861",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper describes our experience implementing
                 PostgreSQL's new serializable isolation level. It is
                 based on the recently-developed Serializable Snapshot
                 Isolation (SSI) technique. This is the first
                 implementation of SSI in a production database release
                 as well as the first in a database that did not
                 previously have a lock-based serializable isolation
                 level. We reflect on our experience and describe how we
                 overcame some of the resulting challenges, including
                 the implementation of a new lock manager, a technique
                 for ensuring memory usage is bounded, and integration
                 with other PostgreSQL features. We also introduce an
                 extension to SSI that improves performance for
                 read-only transactions. We evaluate PostgreSQL's
                 serializable isolation level using several benchmarks
                 and show that it achieves performance only slightly
                 below that of snapshot isolation, and significantly
                 outperforms the traditional two-phase locking approach
                 on read-intensive workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Murthy:2012:EEU,
  author =       "Karin Murthy and Prasad M. Deshpande and Atreyee Dey
                 and Ramanujam Halasipuram and Mukesh Mohania and P.
                 Deepak and Jennifer Reed and Scott Schumacher",
  title =        "Exploiting evidence from unstructured data to enhance
                 master data management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1862--1873",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Master data management (MDM) integrates data from
                 multiple structured data sources and builds a
                 consolidated 360-degree view of business entities such
                 as customers and products. Today's MDM systems are not
                 prepared to integrate information from unstructured
                 data sources, such as news reports, emails, call-center
                 transcripts, and chat logs. However, those unstructured
                 data sources may contain valuable information about the
                 same entities known to MDM from the structured data
                 sources. Integrating information from unstructured data
                 into MDM is challenging as textual references to
                 existing MDM entities are often incomplete and
                 imprecise and the additional entity information
                 extracted from text should not impact the
                 trustworthiness of MDM data. In this paper, we present
                 an architecture for making MDM text-aware and showcase
                 its implementation as IBM Info-Sphere MDM Extension for
                 Unstructured Text Correlation, an add-on to IBM
                 InfoSphere Master Data Management Standard Edition. We
                 highlight how MDM benefits from additional evidence
                 found in documents when doing entity resolution and
                 relationship discovery. We experimentally demonstrate
                 the feasibility of integrating information from
                 unstructured data sources into MDM.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2012:AOW,
  author =       "Lili Wu and Roshan Sumbaly and Chris Riccomini and
                 Gordon Koo and Hyung Jin Kim and Jay Kreps and Sam
                 Shah",
  title =        "{Avatara}: {OLAP} for web-scale analytics products",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1874--1877",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multidimensional data generated by members on websites
                 has seen massive growth in recent years. OLAP is a
                 well-suited solution for mining and analyzing this
                 data. Providing insights derived from this analysis has
                 become crucial for these websites to give members
                 greater value. For example, LinkedIn, the largest
                 professional social network, provides its professional
                 members rich analytics features like ``Who's Viewed My
                 Profile?'' and ``Who's Viewed This Job?'' The data
                 behind these features form cubes that must be
                 efficiently served at scale, and can be neatly sharded
                 to do so. To serve our growing 160 million member base,
                 we built a scalable and fast OLAP serving system called
                 Avatara to solve this many, small cubes problem. At
                 LinkedIn, Avatara has been powering several analytics
                 features on the site for the past two years.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kolb:2012:DED,
  author =       "Lars Kolb and Andreas Thor and Erhard Rahm",
  title =        "{Dedoop}: efficient deduplication with {Hadoop}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1878--1881",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate a powerful and easy-to-use tool called
                 Dedoop (Deduplication with Hadoop) for MapReduce-based
                 entity resolution (ER) of large datasets. Dedoop
                 supports a browser-based specification of complex ER
                 workflows including blocking and matching steps as well
                 as the optional use of machine learning for the
                 automatic generation of match classifiers. Specified
                 workflows are automatically translated into MapReduce
                 jobs for parallel execution on different Hadoop
                 clusters. To achieve high performance Dedoop supports
                 several advanced load balancing strategies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2012:MBD,
  author =       "Xiufeng Liu and Christian Thomsen and Torben Bach
                 Pedersen",
  title =        "{MapReduce}-based dimensional {ETL} made easy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1882--1885",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper demonstrates ETLMR, a novel dimensional
                 Extract--Transform--Load (ETL) programming framework
                 that uses Map-Reduce to achieve scalability. ETLMR has
                 built-in native support of data warehouse (DW) specific
                 constructs such as star schemas, snowflake schemas, and
                 slowly changing dimensions (SCDs). This makes it
                 possible to build MapReduce-based dimensional ETL flows
                 very easily. The ETL process can be configured with
                 only few lines of code. We will demonstrate the
                 concrete steps in using ETLMR to load data into a
                 (partly snowflaked) DW schema. This includes
                 configuration of data sources and targets, dimension
                 processing schemes, fact processing, and deployment. In
                 addition, we also present the scalability on large data
                 sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2012:CIE,
  author =       "Huiqi Xu and Zhen Li and Shumin Guo and Keke Chen",
  title =        "{CloudVista}: interactive and economical visual
                 cluster analysis for big data in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1886--1889",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analysis of big data has become an important problem
                 for many business and scientific applications, among
                 which clustering and visualizing clusters in big data
                 raise some unique challenges. This demonstration
                 presents the CloudVista prototype system to address the
                 problems with big data caused by using existing data
                 reduction approaches. It promotes a whole-big-data
                 visualization approach that preserves the details of
                 clustering structure. The prototype system has several
                 merits. (1) Its visualization model is naturally
                 parallel, which guarantees the scalability. (2) The
                 visual frame structure minimizes the data transferred
                 between the cloud and the client. (3) The RandGen
                 algorithm is used to achieve a good balance between
                 interactivity and batch processing. (4) This approach
                 is also designed to minimize the financial cost of
                 interactive exploration in the cloud. The demonstration
                 will highlight the problems with existing approaches
                 and show the advantages of the CloudVista approach. The
                 viewers will have the chance to play with the
                 CloudVista prototype system and compare the
                 visualization results generated with different
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alexandrov:2012:MSE,
  author =       "Alexander Alexandrov and Kostas Tzoumas and Volker
                 Markl",
  title =        "{Myriad}: scalable and expressive data generation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1890--1893",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The current research focus on Big Data systems calls
                 for a rethinking of data generation methods. The
                 traditional sequential data generation approach is not
                 well suited to large-scale systems as generating a
                 terabyte of data may require days or even weeks
                 depending on the number of constraints imposed on the
                 generated model. We demonstrate Myriad, a new data
                 generation toolkit that enables the specification of
                 semantically rich data generator programs that can
                 scale out linearly in a shared-nothing environment.
                 Data generation programs built on top of Myriad
                 implement an efficient parallel execution strategy
                 leveraged by the extensive use of pseudo-random number
                 generators with random access support.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2012:DDC,
  author =       "Eugene Wu and Samuel Madden and Michael Stonebraker",
  title =        "A demonstration of {DBWipes}: clean as you query",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1894--1897",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As data analytics becomes mainstream, and the
                 complexity of the underlying data and computation
                 grows, it will be increasingly important to provide
                 tools that help analysts understand the underlying
                 reasons when they encounter errors in the result. While
                 data provenance has been a large step in providing
                 tools to help debug complex workflows, its current form
                 has limited utility when debugging aggregation
                 operators that compute a single output from a large
                 collection of inputs. Traditional provenance will
                 return the entire input collection, which has very low
                 precision. In contrast, users are seeking precise
                 descriptions of the inputs that caused the errors. We
                 propose a Ranked Provenance System, which identifies
                 subsets of inputs that influenced the output error,
                 describes each subset with human readable predicates
                 and orders them by contribution to the error. In this
                 demonstration, we will present DBWipes, a novel data
                 cleaning system that allows users to execute aggregate
                 queries, and interactively detect, understand, and
                 clean errors in the query results. Conference attendees
                 will explore anomalies in campaign donations from the
                 current US presidential election and in readings from a
                 54-node sensor deployment.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alsubaiee:2012:AOS,
  author =       "Sattam Alsubaiee and Yasser Altowim and Hotham
                 Altwaijry and Alexander Behm and Vinayak Borkar and
                 Yingyi Bu and Michael Carey and Raman Grover and
                 Zachary Heilbron and Young-Seok Kim and Chen Li and
                 Nicola Onose and Pouria Pirzadeh and Rares Vernica and
                 Jian Wen",
  title =        "{ASTERIX}: an open source system for ``Big {Data'}'
                 management and analysis (demo)",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1898--1901",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "At UC Irvine, we are building a next generation
                 parallel database system, called ASTERIX, as our
                 approach to addressing today's ``Big Data'' management
                 challenges. ASTERIX aims to combine time-tested
                 principles from parallel database systems with those of
                 the Web-scale computing community, such as fault
                 tolerance for long running jobs. In this demo, we
                 present a whirlwind tour of ASTERIX, highlighting a few
                 of its key features. We will demonstrate examples of
                 our data definition language to model semi-structured
                 data, and examples of interesting queries using our
                 declarative query language. In particular, we will show
                 the capabilities of ASTERIX for answering geo-spatial
                 queries and fuzzy queries, as well as ASTERIX' data
                 feed construct for continuously ingesting data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Agarwal:2012:BDI,
  author =       "Sameer Agarwal and Anand P. Iyer and Aurojit Panda and
                 Samuel Madden and Barzan Mozafari and Ion Stoica",
  title =        "Blink and it's done: interactive queries on very large
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1902--1905",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration, we present BlinkDB, a massively
                 parallel, sampling-based approximate query processing
                 framework for running interactive queries on large
                 volumes of data. The key observation in BlinkDB is that
                 one can make reasonable decisions in the absence of
                 perfect answers. BlinkDB extends the Hive/HDFS stack
                 and can handle the same set of SPJA (selection,
                 projection, join and aggregate) queries as supported by
                 these systems. BlinkDB provides real-time answers along
                 with statistical error guarantees, and can scale to
                 petabytes of data and thousands of machines in a
                 fault-tolerant manner. Our experiments using the TPC-H
                 benchmark and on an anonymized real-world video content
                 distribution workload from Conviva Inc. show that
                 BlinkDB can execute a wide range of queries up to 150x
                 faster than Hive on MapReduce and 10--150x faster than
                 Shark (Hive on Spark) over tens of terabytes of data
                 stored across 100 machines, all with an error of
                 2--10\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Roy:2012:MGD,
  author =       "Abhishek Roy and Yanlei Diao and Evan Mauceli and
                 Yiping Shen and Bai-Lin Wu",
  title =        "Massive genomic data processing and deep analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1906--1909",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today large sequencing centers are producing genomic
                 data at the rate of 10 terabytes a day and require
                 complicated processing to transform massive amounts of
                 noisy raw data into biological information. To address
                 these needs, we develop a system for end-to-end
                 processing of genomic data, including alignment of
                 short read sequences, variation discovery, and deep
                 analysis. We also employ a range of quality control
                 mechanisms to improve data quality and parallel
                 processing techniques for performance. In the demo, we
                 will use real genomic data to show details of data
                 transformation through the workflow, the usefulness of
                 end results (ready for use as testable hypotheses), the
                 effects of our quality control mechanisms and improved
                 algorithms, and finally performance improvement.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liarou:2012:MDO,
  author =       "Erietta Liarou and Stratos Idreos and Stefan Manegold
                 and Martin Kersten",
  title =        "{MonetDB\slash DataCell}: online analytics in a
                 streaming column-store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1910--1913",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In DataCell, we design streaming functionalities in a
                 modern relational database kernel which targets big
                 data analytics. This includes exploitation of both its
                 storage/execution engine and its optimizer
                 infrastructure. We investigate the opportunities and
                 challenges that arise with such a direction and we show
                 that it carries significant advantages for modern
                 applications in need for online analytics such as web
                 logs, network monitoring and scientific data
                 management. The major challenge then becomes the
                 efficient support for specialized stream features,
                 e.g., multi-query processing and incremental
                 window-based processing as well as exploiting standard
                 DBMS functionalities in a streaming environment such as
                 indexing. This demo presents DataCell, an extension of
                 the MonetDB open-source column-store for online
                 analytics. The demo gives users the opportunity to
                 experience the features of DataCell such as processing
                 both stream and persistent data and performing window
                 based processing. The demo provides a visual interface
                 to monitor the critical system components, e.g., how
                 query plans transform from typical DBMS query plans to
                 online query plans, how data flows through the query
                 plans as the streams evolve, how DataCell maintains
                 intermediate results in columnar form to avoid repeated
                 evaluation of the same stream portions, etc. The demo
                 also provides the ability to interactively set the test
                 scenarios and various DataCell knobs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2012:SSE,
  author =       "Xin Cao and Gao Cong and Christian S. Jensen and Jun
                 Jie Ng and Beng Chin Ooi and Nhan-Tue Phan and Dingming
                 Wu",
  title =        "{SWORS}: a system for the efficient retrieval of
                 relevant spatial web objects",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1914--1917",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Spatial web objects that possess both a geographical
                 location and a textual description are gaining in
                 prevalence. This gives prominence to spatial keyword
                 queries that exploit both location and textual
                 arguments. Such queries are used in many web services
                 such as yellow pages and maps services. We present
                 SWORS, the Spatial Web Object Retrieval System, that is
                 capable of efficiently retrieving spatial web objects
                 that satisfy spatial keyword queries. Specifically,
                 SWORS supports two types of queries: (a) the
                 location-aware top-$k$ text retrieval (L $k$ T) query
                 that retrieves $k$ individual spatial web objects
                 taking into account query location proximity and text
                 relevancy; (b) the spatial keyword group (SKG) query
                 that retrieves a group of objects that cover the query
                 keywords and are nearest to the query location and have
                 the shortest inter-object distances. SWORS provides
                 browser-based interfaces for desktop and laptop
                 computers and provides a client application for mobile
                 devices. The interfaces and the client enable users to
                 formulate queries and view the query results on a map.
                 The server side stores the data and processes the
                 queries. We use three real-life data sets to
                 demonstrate the functionality and performance of
                 SWORS.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Morishima:2012:CCD,
  author =       "Atsuyuki Morishima and Norihide Shinagawa and Tomomi
                 Mitsuishi and Hideto Aoki and Shun Fukusumi",
  title =        "{CyLog\slash Crowd4U}: a declarative platform for
                 complex data-centric crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1918--1921",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This demo presents a principled approach to the
                 problems of data-centric human/machine computations
                 with Crowd4U, a crowdsourcing platform equipped with a
                 suite of tools for rapid development of crowdsourcing
                 applications. Using the demo, we show that declarative
                 database abstraction can be used as a powerful tool to
                 design, implement, and analyze data-centric
                 crowdsourcing applications. The power of Crowd4U comes
                 from CyLog, a database abstraction that handles complex
                 data-centric human/machine computations. CyLog is a
                 Datalog-like language that incorporates a principled
                 feedback system for humans at the language level so
                 that the semantics of the computation not closed in
                 machines can be defined based on the game theory. We
                 believe that the demo clearly shows that database
                 abstraction can be a promising basis for designing
                 complex data-centric applications requiring
                 human/machine computations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Silva:2012:EDS,
  author =       "Yasin N. Silva and Spencer Pearson",
  title =        "Exploiting database similarity joins for metric
                 spaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1922--1925",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Similarity Joins are recognized among the most useful
                 data processing and analysis operations and are
                 extensively used in multiple application domains. They
                 retrieve all data pairs whose distances are smaller
                 than a predefined threshold $ \epsilon $. Multiple
                 Similarity Join algorithms and implementation
                 techniques have been proposed. They range from
                 out-of-database approaches for only in-memory and
                 external memory data to techniques that make use of
                 standard database operators to answer similarity joins.
                 Recent work has shown that this operation can be
                 efficiently implemented as a physical database
                 operator. However, the proposed operator only support
                 1D numeric data. This paper presents DBSimJoin, a
                 physical Similarity Join database operator for datasets
                 that lie in any metric space. DBSimJoin is a
                 non-blocking operator that prioritizes the early
                 generation of results. We implemented the proposed
                 operator in PostgreSQL, an open source database system.
                 We show how this operator can be used in multiple
                 real-world data analysis scenarios with multiple data
                 types and distance functions. Particularly, we show the
                 use of DBSimJoin to identify similar images represented
                 as feature vectors, and similar publications in a
                 bibliographic database. We also show that DBSimJoin
                 scales very well when important parameters, e.g., e,
                 data size, increase.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gawade:2012:SPI,
  author =       "Mrunal Gawade and Martin Kersten",
  title =        "{Stethoscope}: a platform for interactive visual
                 analysis of query execution plans",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1926--1929",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Searching for the performance bottleneck in an
                 execution trace is an error prone and time consuming
                 activity. Existing tools offer some comfort by
                 providing a visual representation of trace for
                 analysis. In this paper we present the Stethoscope, an
                 interactive visual tool to inspect and analyze columnar
                 database query performance, both online and offline.
                 It's unique interactive animated interface capitalizes
                 the large data-flow graph representation of a query
                 execution plan, augmented with query execution trace
                 information. We demonstrate features of Stethoscope for
                 both online and offline analysis of long running
                 queries. It helps in understanding where time goes, how
                 optimizers perform, and how parallel processing on
                 multi-core systems is exploited.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kotsifakos:2012:HSS,
  author =       "Alexios Kotsifakos and Panagiotis Papapetrou and
                 Jaakko Hollm{\'e}n and Dimitrios Gunopulos and Vassilis
                 Athitsos and George Kollios",
  title =        "{Hum-a-song}: a subsequence matching with
                 gaps-range-tolerances query-by-humming system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1930--1933",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present ``Hum-a-song'', a system built for music
                 retrieval, and particularly for the Query-By-Humming
                 (QBH) application. According to QBH, the user is able
                 to hum a part of a song that she recalls and would like
                 to learn what this song is, or find other songs similar
                 to it in a large music repository. We present a simple
                 yet efficient approach that maps the problem to time
                 series subsequence matching. The query and the database
                 songs are represented as 2-dimensional time series
                 conveying information about the pitch and the duration
                 of the notes. Then, since the query is a short sequence
                 and we want to find its best match that may start and
                 end anywhere in the database, subsequence matching
                 methods are suitable for this task. In this demo, we
                 present a system that employs and exposes to the user a
                 variety of state-of-the-art dynamic programming
                 methods, including a newly proposed efficient method
                 named SMBGT that is robust to noise and considers all
                 intrinsic problems in QBH; it allows variable tolerance
                 levels when matching elements, where tolerances are
                 defined as functions of the compared sequences, gaps in
                 both the query and target sequences, and bounds the
                 matching length and (optionally) the minimum number of
                 matched elements. Our system is intended to become open
                 source, which is to the best of our knowledge the first
                 non-commercial effort trying to solve QBH with a
                 variety of methods, and that also approaches the
                 problem from the time series perspective.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kwon:2012:SAM,
  author =       "YongChul Kwon and Magdalena Balazinska and Bill Howe
                 and Jerome Rolia",
  title =        "{SkewTune} in action: mitigating skew in {MapReduce}
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1934--1937",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate SkewTune, a system that automatically
                 mitigates skew in user-defined MapReduce programs and
                 is a drop-in replacement for Hadoop. The demonstration
                 has two parts. First, we demonstrate how SkewTune
                 mitigates skew in real MapReduce applications at
                 runtime by running a real application in a public
                 cloud. Second, through an interactive graphical
                 interface, we demonstrate the details of the skew
                 mitigation process using both real and synthetic
                 workloads that represent various skew configurations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abouzied:2012:PQS,
  author =       "Azza Abouzied and Joseph M. Hellerstein and Avi
                 Silberschatz",
  title =        "Playful query specification with {DataPlay}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1938--1941",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "DataPlay is a query tool that encourages a
                 trial-and-error approach to query specification.
                 DataPlay uses a graphical query language to make a
                 particularly challenging query specification task ---
                 quantification --- easier. It constrains the relational
                 data model to enable the presentation of non-answers,
                 in addition to answers, to aid query interpretation.
                 Two novel features of DataPlay are suggesting semantic
                 variations to a query and correcting queries by
                 example. We introduce DataPlay as a sophisticated query
                 specification tool and demonstrate its unique
                 interaction models.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alagiannis:2012:NAA,
  author =       "Ioannis Alagiannis and Renata Borovica and Miguel
                 Branco and Stratos Idreos and Anastasia Ailamaki",
  title =        "{NoDB} in action: adaptive query processing on raw
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1942--1945",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As data collections become larger and larger, users
                 are faced with increasing bottlenecks in their data
                 analysis. More data means more time to prepare the
                 data, to load the data into the database and to execute
                 the desired queries. Many applications already avoid
                 using traditional database systems, e.g., scientific
                 data analysis and social networks, due to their
                 complexity and the increased data-to-query time, i.e.
                 the time between getting the data and retrieving its
                 first useful results. For many applications data
                 collections keep growing fast, even on a daily basis,
                 and this data deluge will only increase in the future,
                 where it is expected to have much more data than what
                 we can move or store, let alone analyze. In this
                 demonstration, we will showcase a new philosophy for
                 designing database systems called NoDB. NoDB aims at
                 minimizing the data-to-query time, most prominently by
                 removing the need to load data before launching
                 queries. We will present our prototype implementation,
                 PostgresRaw, built on top of PostgreSQL, which allows
                 for efficient query execution over raw data files with
                 zero initialization overhead. We will visually
                 demonstrate how PostgresRaw incrementally and
                 adaptively touches, parses, caches and indexes raw data
                 files autonomously and exclusively as a side-effect of
                 user queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wenzel:2012:CPQ,
  author =       "Florian Wenzel and Markus Endres and Stefan Mandl and
                 Werner Kie{\ss}ling",
  title =        "Complex preference queries supporting spatial
                 applications for user groups",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1946--1949",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Our demo application demonstrates a personalized
                 location-based web application using Preference SQL
                 that allows single users as well as groups of users to
                 find accommodations in Istanbul that satisfy both hard
                 constraints and user preferences. The application
                 assists in defining spatial, numerical, and categorical
                 base preferences and composes complex preference
                 statements in an intuitive fashion. Unlike existing
                 location-based services, the application considers
                 spatial queries as soft instead of hard constraints to
                 determine the best matches which are finally presented
                 on a map. The underlying Preference SQL framework is
                 implemented on top of a database, therefore enabling a
                 seamless application integration with standard SQL
                 back-end systems as well as efficient and extensible
                 preference query processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bakibayev:2012:DFQ,
  author =       "Nurzhan Bakibayev and Dan Olteanu and Jakub
                 Z{\'a}vodn{\'y}",
  title =        "Demonstration of the {FDB} query engine for factorised
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1950--1953",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "FDB is an in-memory query engine for factorised
                 databases, which are relational databases that use
                 compact factorised representations at the physical
                 layer to reduce data redundancy and boost query
                 performance. We demonstrate FDB using real data sets
                 from IMDB, DBLP, and the NELL repository of facts
                 learned from Web pages. The users can inspect
                 factorisations as well as plans used by FDB to compute
                 factorised results of select-project-join queries on
                 factorised databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2012:PRD,
  author =       "Zichen Xu and Yi-Cheng Tu and Xiaorui Wang",
  title =        "{PET}: reducing database energy cost via query
                 optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1954--1957",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Energy conservation is a growing important issue in
                 designing modern database management system (DBMS).
                 This requires a deep thinking about the tradeoffs
                 between energy and performance. Despite the significant
                 amount of efforts at the hardware level to make the
                 major components consume less energy, we argue for a
                 revisit of the DBMS query processing mechanism to
                 identify and harvest the potential of energy saving.
                 However, the state-of-art architecture of DBMS does not
                 take energy usage into consideration in its design. A
                 major challenge in developing an energy-aware DBMS is
                 to design and implement a cost-based query optimizer
                 that evaluates query plans by both performance and
                 energy costs. By following such a strategy, our
                 previous work revealed the fact that energy-efficient
                 query plans do not necessarily have the shortest
                 processing time. This demo proposal introduces PET ---
                 an energy-aware query optimization framework that is
                 built as a part of the PostgreSQL kernel. PET, via its
                 power cost estimation module and plan evaluation model,
                 enables the database system to run under a
                 DBA-specified energy/performance tradeoff level. PET
                 contains a power cost estimator that can accurately
                 estimate the power cost of query plans at compile time,
                 and a query evaluation engine that the DBA could
                 configure key PET parameters towards the desired
                 tradeoff. The software to be demonstrated will also
                 include workload engine for producing large quantities
                 of queries and data sets. Our demonstration will show
                 how PET functions via a comprehensive set of views from
                 its graphical user interface named PET Viewer. Through
                 such interfaces, a user can achieve a good
                 understanding of the energy-related query optimization
                 and cost-based plan generation. Users are also allowed
                 to interact with PET to experience the different
                 energy/performance tradeoffs by changing PET and
                 workload parameters at query runtime.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Letelier:2012:SSA,
  author =       "Andr{\'e}s Letelier and Jorge P{\'e}rez and Reinhard
                 Pichler and Sebastian Skritek",
  title =        "{SPAM}: a {SPARQL} analysis and manipulation tool",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1958--1961",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "SQL developers are used to having elaborate tools
                 which help them in writing queries. In contrast, the
                 creation of tools to assist users in the development of
                 SPARQL queries is still in its infancy. In this system
                 demo, we present the SPARQL Analysis and Manipulation
                 (SPAM) tool, which provides help for the development of
                 SPARQL queries. The main features of the SPAM tool
                 comprise an editor with both text and graphical
                 interface, as well as various functions for the static
                 and dynamic analysis of SPARQL queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koutris:2012:QDP,
  author =       "Paraschos Koutris and Prasang Upadhyaya and Magdalena
                 Balazinska and Bill Howe and Dan Suciu",
  title =        "{QueryMarket} demonstration: pricing for online data
                 markets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1962--1965",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Increasingly data is being bought and sold online. To
                 facilitate such transactions, online data market-places
                 have emerged to provide a service for sellers to price
                 views on their data, and buyers to buy such views.
                 These marketplaces neither support the sale of ad-hoc
                 queries (that are not one of the specified views), nor
                 do they support queries that join datasets. We present
                 QueryMarket, a prototype data marketplace that
                 automatically extrapolates prices to ad-hoc queries,
                 including those with joins, from the manually priced
                 views. We call this capability ``query-based pricing''
                 and describe how it is superior to existing pricing
                 methods, and how it provides more flexible pricing for
                 the sellers. We then show how QueryMarket implements
                 query-based pricing and how it generates explanations
                 for the prices it computes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Luo:2012:DSD,
  author =       "Siqiang Luo and Yifeng Luo and Shuigeng Zhou and Gao
                 Cong and Jihong Guan",
  title =        "{DISKs}: a system for distributed spatial group
                 keyword search on road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1966--1969",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query (e.g., shortest path) on road networks has been
                 extensively studied. Although most of the existing
                 query processing approaches are designed for
                 centralized environments, there is a growing need to
                 handle queries on road networks in distributed
                 environments due to the increasing query workload and
                 the challenge of querying large networks. In this
                 demonstration, we showcase a distributed system called
                 {DISKs} (DIstributed Spatial Keyword search) that is
                 capable of efficiently supporting spatial group keyword
                 search (S-GKS) on road networks. Given a group of
                 keywords $X$ and a distance $r$, an SGKS returns
                 locations on a road network, such that for each
                 returned location $p$, there exists a set of nodes (on
                 the road network), which are located within a network
                 distance $r$ from $p$ and collectively contains $X$. We
                 will demonstrate the innovative modules, performance
                 and interactive user interfaces of DISKs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Endrullis:2012:WEM,
  author =       "Stefan Endrullis and Andreas Thor and Erhard Rahm",
  title =        "{WETSUIT}: an efficient mashup tool for searching and
                 fusing web entities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1970--1973",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate a new powerful mashup tool called
                 WETSUIT (Web EnTity Search and fUsIon Tool) to search
                 and integrate web data from diverse sources and
                 domain-specific entity search engines. WETSUIT supports
                 adaptive search strategies to query sets of relevant
                 entities with a minimum of communication overhead.
                 Mashups can be composed using a set of high-level
                 operators based on the Java-compatible language Scala.
                 The operator implementation supports a high degree of
                 parallel processing, in particular a streaming of
                 entities between all data transformation operations
                 facilitating a fast presentation of intermediate
                 results. WETSUIT has already been applied to solve
                 challenging integration tasks from different domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khalefa:2012:MBI,
  author =       "Mohamed E. Khalefa and Ulrike Fischer and Torben Bach
                 Pedersen and Wolfgang Lehner",
  title =        "Model-based integration of past \& future in
                 {TimeTravel}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1974--1977",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate TimeTravel, an efficient DBMS system
                 for seamless integrated querying of past and
                 (forecasted) future values of time series, allowing the
                 user to view past and future values as one joint time
                 series. This functionality is important for advanced
                 application domain like energy. The main idea is to
                 compactly represent time series as models. By using
                 models, the TimeTravel system answers queries
                 approximately on past and future data with error
                 guarantees (absolute error and confidence) one order of
                 magnitude faster than when accessing the time series
                 directly. In addition, it efficiently supports exact
                 historical queries by only accessing relevant portions
                 of the time series. This is unlike existing approaches,
                 which access the entire time series to exactly answer
                 the query. To realize this system, we propose a novel
                 hierarchical model index structure. As real-world time
                 series usually exhibits seasonal behavior, models in
                 this index incorporate seasonality. To construct a
                 hierarchical model index, the user specifies
                 seasonality period, error guarantees levels, and a
                 statistical forecast method. As time proceeds, the
                 system incrementally updates the index and utilizes it
                 to answer approximate and exact queries. TimeTravel is
                 implemented into PostgreSQL, thus achieving complete
                 user transparency at the query level. In the demo, we
                 show the easy building of a hierarchical model index
                 for a real-world time series and the effect of varying
                 the error guarantees on the speed up of approximate and
                 exact queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eberius:2012:DEB,
  author =       "Julian Eberius and Maik Thiele and Katrin Braunschweig
                 and Wolfgang Lehner",
  title =        "{DrillBeyond}: enabling business analysts to explore
                 the {Web of Open Data}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1978--1981",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Following the Open Data trend, governments and public
                 agencies have started making their data available on
                 the Web and established platforms such as data.gov or
                 data.un.org. These Open Data platforms provide a huge
                 amount of data for various topics such as demographics,
                 transport, finance or health in various data formats.
                 One typical usage scenario for this kind of data is
                 their integration into a database or data warehouse in
                 order to apply data analytics. However, in today's
                 business intelligence tools there is an evident lack of
                 support for so-called situational or ad-hoc data
                 integration. In this demonstration we will therefore
                 present DrillBeyond, a novel database and information
                 retrieval engine which allows users to query a local
                 database as well as the Web of Open Data in a seamless
                 and integrated way with standard SQL. The audience will
                 be able to pose queries to our DrillBeyond system which
                 will be answered partly from local data in the database
                 and partly from datasets that originate from the Web of
                 Data. We will show how such queries are divided into
                 known and unknown parts and how missing attributes are
                 mapped to open datasets. We will demonstrate the
                 integration of the open datasets back into the DBMS in
                 order to apply its analytical features.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nakashole:2012:DER,
  author =       "Ndapandula Nakashole and Gerhard Weikum and Fabian
                 Suchanek",
  title =        "Discovering and exploring relations on the web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1982--1985",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose a demonstration of PATTY, a system for
                 learning semantic relationships from the Web. PATTY is
                 a collection of relations learned automatically from
                 text. It aims to be to patterns what WordNet is to
                 words. The semantic types of PATTY relations enable
                 advanced search over subject-predicate-object data.
                 With the ongoing trends of enriching Web data (both
                 text and tables) with entity-relationship-oriented
                 semantic annotations, we believe a demo of the PATTY
                 system will be of interest to the database community.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Thirumuruganathan:2012:MME,
  author =       "Saravanan Thirumuruganathan and Mahashweta Das and
                 Shrikant Desai and Sihem Amer-Yahia and Gautam Das and
                 Cong Yu",
  title =        "{MapRat}: meaningful explanation, interactive
                 exploration and geo-visualization of collaborative
                 ratings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1986--1989",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Collaborative rating sites such as IMDB and Yelp have
                 become rich resources that users consult to form
                 judgments about and choose from among competing items.
                 Most of these sites either provide a plethora of
                 information for users to interpret all by themselves or
                 a simple overall aggregate information. Such aggregates
                 (e.g., average rating over all users who have rated an
                 item, aggregates along pre-defined dimensions, etc.)
                 can not help a user quickly decide the desirability of
                 an item. In this paper, we build a system MapRat that
                 allows a user to explore multiple carefully chosen
                 aggregate analytic details over a set of user
                 demographics that meaningfully explain the ratings
                 associated with item(s) of interest. MapRat allows a
                 user to systematically explore, visualize and
                 understand user rating patterns of input item(s) so as
                 to make an informed decision quickly. In the demo,
                 participants are invited to explore collaborative movie
                 ratings for popular movies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Park:2012:DSD,
  author =       "Hyunjung Park and Hector Garcia-Molina and Richard
                 Pang and Neoklis Polyzotis and Aditya Parameswaran and
                 Jennifer Widom",
  title =        "{Deco}: a system for declarative crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1990--1993",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Deco is a system that enables declarative
                 crowdsourcing: answering SQL queries posed over data
                 gathered from the crowd as well as existing relational
                 data. Deco implements a novel push-pull hybrid
                 execution model in order to support a flexible data
                 model and a precise query semantics, while coping with
                 the combination of latency, monetary cost, and
                 uncertainty of crowdsourcing. We demonstrate Deco using
                 two crowdsourcing platforms: Amazon Mechanical Turk and
                 an in-house platform, to show how Deco provides a
                 convenient means of collecting and querying
                 crowdsourced data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Martens:2012:DAX,
  author =       "Wim Martens and Matthias Niewerth and Frank Neven and
                 Thomas Schwentick",
  title =        "Developing and analyzing {XSDs} through {BonXai}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1994--1997",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "BonXai is a versatile schema specification language
                 expressively equivalent to XML Schema. It is not
                 intended as a replacement for XML Schema but it can
                 serve as an additional, user-friendly front-end. It
                 offers a simple way and a lightweight syntax to specify
                 the context of elements based on regular expressions
                 rather than on types. In this demo we show the
                 front-end capabilities of BonXai and exemplify its
                 potential to offer a novel way to view existing XML
                 Schema Definitions. In particular, we present several
                 usage scenarios specifically targeted to showcase the
                 ease of specifying, modifying, and understanding XML
                 Schema Definitions through BonXai.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elmore:2012:IEG,
  author =       "Aaron J. Elmore and Sudipto Das and Divyakant Agrawal
                 and Amr {El Abbadi}",
  title =        "{InfoPuzzle}: exploring group decision making in
                 mobile peer-to-peer databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "1998--2001",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As Internet-based services and mobile computing
                 devices, such as smartphones and tablets, become
                 ubiquitous, society's reliance on them to accomplish
                 critical and time-sensitive tasks, such as information
                 dissemination and collaborative decision making, also
                 increases. Dependence on these media magnifies the
                 damage caused by their disruption, whether malicious or
                 natural. For instance, a natural disaster disrupting
                 cellular and Internet infrastructures impedes
                 information spread, which in turn leads to chaos, both
                 among the victims as well as the aid providers.
                 Decentralized and ad-hoc mechanisms for information
                 dissemination and decision making are paramount to help
                 restore order. We demonstrate InfoPuzzle, a mobile
                 peer-to-peer database that utilizes direct device
                 communication to enable group decision making, or
                 consensus, without reliance on centralized
                 communication services. InfoPuzzle minimizes the
                 system's resource consumption, to prolong the lifetime
                 of the power constrained devices by minimizing
                 communication overhead, computational complexity, and
                 persistent storage size. Due to user mobility and the
                 limited range of point-to-point communication, knowing
                 the exact number of participants is impossible, and
                 therefore traditional consensus or quorum protocols
                 cannot be used. We rely of distinct counting
                 techniques, probabilistic thresholds, and bounded time
                 based approaches to reach agreement. In this demo, we
                 will explore various challenges and heuristics in
                 estimating group participation to aid users in
                 reconciling consensus without centralized services.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2012:MQG,
  author =       "Jianqiu Xu and Ralf Hartmut G{\"u}ting",
  title =        "Manage and query generic moving objects in {SECONDO}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2002--2005",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration, we introduce a system that is
                 able to manage moving objects in all real world
                 environments, e.g., road network, bus network and
                 indoor. The complete trip of a person is managed by the
                 system such as Walk, Car, Walk, and Indoor, where the
                 precise locations of both outdoor and indoor movements
                 are represented. Trajectories located in several
                 environments are integrated into the same framework.
                 The system supports the shortest path searching for
                 start and end locations being in different
                 environments, for example, from a room to a bus stop. A
                 comprehensive and scalable set of moving objects is
                 generated to simulate human movement in practice.
                 Optimization methods are developed to efficiently
                 answer novel queries regarding transportation modes and
                 mobile environments. Most of these queries are not
                 supported by existing methods because of the limitation
                 of data representation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2012:CFH,
  author =       "Pei Li and Christina Tziviskou and Haidong Wang and
                 Xin Luna Dong and Xiaoguang Liu and Andrea Maurino and
                 Divesh Srivastava",
  title =        "{Chronos}: facilitating history discovery by linking
                 temporal records",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2006--2009",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many data sets contain temporal records over a long
                 period of time; each record is associated with a time
                 stamp and describes some aspects of a real-world entity
                 at that particular time. From such data, users often
                 wish to search for entities in a particular period and
                 understand the history of one entity or all entities in
                 the data set. A major challenge for enabling such
                 search and exploration is to identify records that
                 describe the same real-world entity over a long period
                 of time; however, linking temporal records is hard
                 given that the values that describe an entity can
                 evolve over time (e.g., a person can move from one
                 affiliation to another). We demonstrate the Chronos
                 system which offers users the useful tool for finding
                 real-world entities over time and understanding history
                 of entities in the bibliography domain. The core of
                 Chronos is a temporal record-linkage algorithm, which
                 is tolerant to value evolution over time. Our algorithm
                 can obtain an F-measure of over 0.9 in linking author
                 records and fix errors made by DBLP. We show how
                 Chronos allows users to explore the history of authors,
                 and how it helps users understand our linkage results
                 by comparing our results with those of existing
                 systems, highlighting differences in the results,
                 explaining our decisions to users, and answering
                 ``what-if'' questions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koubarakis:2012:TDP,
  author =       "Manolis Koubarakis and Mihai Datcu and Charalambos
                 Kontoes and Ugo {Di Giammatteo} and Stefan Manegold and
                 Eva Klien",
  title =        "{TELEIOS}: a database-powered virtual earth
                 observatory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2010--2013",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "TELEIOS is a recent European project that addresses
                 the need for scalable access to petabytes of Earth
                 Observation data and the discovery and exploitation of
                 knowledge that is hidden in them. TELEIOS builds on
                 scientific database technologies (array databases,
                 SciQL, data vaults) and Semantic Web technologies
                 (stRDF and stSPARQL) implemented on top of a state of
                 the art column store database system (MonetDB). We
                 demonstrate a first prototype of the TELEIOS Virtual
                 Earth Observatory (VEO) architecture, using a forest
                 fire monitoring application as example.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dittrich:2012:EBD,
  author =       "Jens Dittrich and Jorge-Arnulfo Quian{\'e}-Ruiz",
  title =        "Efficient big data processing in {Hadoop MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2014--2015",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This tutorial is motivated by the clear need of many
                 organizations, companies, and researchers to deal with
                 big data volumes efficiently. Examples include web
                 analytics applications, scientific applications, and
                 social networks. A popular data processing engine for
                 big data is Hadoop MapReduce. Early versions of Hadoop
                 MapReduce suffered from severe performance problems.
                 Today, this is becoming history. There are many
                 techniques that can be used with Hadoop MapReduce jobs
                 to boost performance by orders of magnitude. In this
                 tutorial we teach such techniques. First, we will
                 briefly familiarize the audience with Hadoop MapReduce
                 and motivate its use for big data processing. Then, we
                 will focus on different data management techniques,
                 going from job optimization to physical data
                 organization like data layouts and indexes. Throughout
                 this tutorial, we will highlight the similarities and
                 differences between Hadoop MapReduce and Parallel DBMS.
                 Furthermore, we will point out unresolved research
                 problems and open issues.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shim:2012:MAB,
  author =       "Kyuseok Shim",
  title =        "{MapReduce} algorithms for big data analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2016--2017",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There is a growing trend of applications that should
                 handle big data. However, analyzing big data is a very
                 challenging problem today. For such applications, the
                 MapReduce framework has recently attracted a lot of
                 attention. Google's MapReduce or its open-source
                 equivalent Hadoop is a powerful tool for building such
                 applications. In this tutorial, we will introduce the
                 MapReduce framework based on Hadoop, discuss how to
                 design efficient MapReduce algorithms and present the
                 state-of-the-art in MapReduce algorithms for data
                 mining, machine learning and similarity joins. The
                 intended audience of this tutorial is professionals who
                 plan to design and develop MapReduce algorithms and
                 researchers who should be aware of the state-of-the-art
                 in MapReduce algorithms available today for big data
                 analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Getoor:2012:ERT,
  author =       "Lise Getoor and Ashwin Machanavajjhala",
  title =        "Entity resolution: theory, practice \& open
                 challenges",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2018--2019",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This tutorial brings together perspectives on ER from
                 a variety of fields, including databases, machine
                 learning, natural language processing and information
                 retrieval, to provide, in one setting, a survey of a
                 large body of work. We discuss both the practical
                 aspects and theoretical underpinnings of ER. We
                 describe existing solutions, current challenges, and
                 open research problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schindler:2012:CND,
  author =       "Jiri Schindler",
  title =        "{I/O} characteristics of {NoSQL} databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2020--2021",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The advent of the so-called NoSQL databases has
                 brought about a new model of using storage systems.
                 While traditional relational database systems took
                 advantage of features offered by centrally-managed,
                 enterprise-class storage arrays, the new generation of
                 database systems with weaker data consistency models is
                 content with using and managing locally attached
                 individual storage devices and providing data
                 reliability and availability through high-level
                 software features and protocols. This work aims to
                 review the architecture of several existing NoSQL DBs
                 with an emphasis on how they organize and access data
                 in the shared-nothing locally-attached storage model.
                 It shows how these systems operate under typical
                 workloads (new inserts and point and range queries),
                 what access characteristics they exhibit to storage
                 systems. Finally, it examines how several recently
                 developed key/value stores, schema-free document
                 storage systems, and extensible column stores organize
                 data on local filesystems on top of directly-attached
                 disks and what system features they must (re)implement
                 in order to provide the expected data reliability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2012:MKI,
  author =       "Yizhou Sun and Jiawei Han and Xifeng Yan and Philip S.
                 Yu",
  title =        "Mining knowledge from interconnected data: a
                 heterogeneous information network analysis approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2022--2023",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Most objects and data in the real world are
                 interconnected, forming complex, heterogeneous but
                 often semi-structured information networks. However,
                 most people consider a database merely as a data
                 repository that supports data storage and retrieval
                 rather than one or a set of heterogeneous information
                 networks that contain rich, inter-related, multi-typed
                 data and information. Most network science researchers
                 only study homogeneous networks, without distinguishing
                 the different types of objects and links in the
                 networks. In this tutorial, we view database and other
                 interconnected data as heterogeneous information
                 networks, and study how to leverage the rich semantic
                 meaning of types of objects and links in the networks.
                 We systematically introduce the technologies that can
                 effectively and efficiently mine useful knowledge from
                 such information networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Prakash:2012:UMC,
  author =       "B. Aditya Prakash and Christos Faloutsos",
  title =        "Understanding and managing cascades on large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2024--2025",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "How do contagions spread in population networks? Which
                 group should we market to, for maximizing product
                 penetration? Will a given YouTube video go viral? Who
                 are the best people to vaccinate? What happens when two
                 products compete? The objective of this tutorial is to
                 provide an intuitive and concise overview of most
                 important theoretical results and algorithms to help us
                 understand and manipulate such propagation-style
                 processes on large networks. The tutorial contains
                 three parts: (a) Theoretical results on the behavior of
                 fundamental models; (b) Scalable Algorithms for
                 changing the behavior of these processes e.g., for
                 immunization, marketing etc.; and (c) Empirical Studies
                 of diffusion on blogs and on-line websites like
                 Twitter. The problems we focus on are central in
                 surprisingly diverse areas: from computer science and
                 engineering, epidemiology and public health, product
                 marketing to information dissemination. Our emphasis is
                 on intuition behind each topic, and guidelines for the
                 practitioner.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dogac:2012:IES,
  author =       "Asuman Dogac",
  title =        "Interoperability in {eHealth} systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2026--2027",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Interoperability in eHealth systems is important for
                 delivering quality healthcare and reducing healthcare
                 costs. Some of the important use cases include
                 coordinating the care of chronic patients by enabling
                 the co-operation of many different eHealth systems such
                 as Electronic Health Record Systems (EHRs), Personal
                 Health Record Systems (PHRs) and wireless medical
                 sensor devices; enabling secondary use of EHRs for
                 clinical research; being able to share life long EHRs
                 among different healthcare providers. Although
                 achieving eHealth interoperability is quite a challenge
                 both because there are competing standards and clinical
                 information itself is very complex, there have been a
                 number of successful industry initiatives such as
                 Integrating the Healthcare Enterprise (IHE) Profiles,
                 as well as large scale deployments such as the National
                 Health Information System of Turkey and the epSOS
                 initiative for sharing Electronic Health Records and
                 ePrescriptions in Europe. This article briefly
                 describes the subjects discussed in the VLDB 2012
                 tutorial to provide an overview of the issues in
                 eHealth interoperability describing the key
                 technologies and standards, identifying important use
                 cases and the associated research challenges and also
                 describing some of the large scale deployments. The aim
                 is to foster further interest in this area.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Agrawal:2012:SPP,
  author =       "Divyakant Agrawal and Amr {El Abbadi} and Shiyuan
                 Wang",
  title =        "Secure and privacy-preserving data services in the
                 cloud: a data centric view",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2028--2029",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Cloud computing becomes a successful paradigm for data
                 computing and storage. Increasing concerns about data
                 security and privacy in the cloud, however, have
                 emerged. Ensuring security and privacy for data
                 management and query processing in the cloud is
                 critical for better and broader uses of the cloud. This
                 tutorial covers some common cloud security and privacy
                 threats and the relevant research, while focusing on
                 the works that protect data confidentiality and query
                 access privacy for sensitive data being stored and
                 queried in the cloud. We provide a comprehensive study
                 of state-of-the-art schemes and techniques for
                 protecting data confidentiality and access privacy,
                 which make different tradeoffs in the multidimensional
                 space of security, privacy, functionality and
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guha:2012:GSS,
  author =       "Sudipto Guha and Andrew McGregor",
  title =        "Graph synopses, sketches, and streams: a survey",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2030--2031",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Massive graphs arise in any application where there is
                 data about both basic entities and the relationships
                 between these entities, e.g., web-pages and hyperlinks;
                 neurons and synapses; papers and citations; IP
                 addresses and network flows; people and their
                 friendships. Graphs have also become the de facto
                 standard for representing many types of highly
                 structured data. However, the sheer size of many of
                 these graphs renders classical algorithms inapplicable
                 when it comes to analyzing such graphs. In addition,
                 these existing algorithms are typically ill-suited to
                 processing distributed or stream data. Various
                 platforms have been developed for processing large data
                 sets. At the same time, there is the need to develop
                 new algorithmic ideas and paradigms. In the case of
                 graph processing, a lot of recent work has focused on
                 understanding the important algorithmic issues. An
                 central aspect of this is the question of how to
                 construct and leverage small-space synopses in graph
                 processing. The goal of this tutorial is to survey
                 recent work on this question and highlight interesting
                 directions for future research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Labrinidis:2012:COB,
  author =       "Alexandros Labrinidis and H. V. Jagadish",
  title =        "Challenges and opportunities with big data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2032--2033",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The promise of data-driven decision-making is now
                 being recognized broadly, and there is growing
                 enthusiasm for the notion of ``Big Data,'' including
                 the recent announcement from the White House about new
                 funding initiatives across different agencies, that
                 target research for Big Data. While the promise of Big
                 Data is real --- for example, it is estimated that
                 Google alone contributed 54 billion dollars to the US
                 economy in 2009 --- there is no clear consensus on what
                 is Big Data. In fact, there have been many
                 controversial statements about Big Data, such as ``Size
                 is the only thing that matters.'' In this panel we will
                 try to explore the controversies and debunk the myths
                 surrounding Big Data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{ElAbbadi:2012:PDS,
  author =       "Amr {El Abbadi} and Mohamed F. Mokbel",
  title =        "Panel discussion on social networks and mobility in
                 the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "5",
  number =       "12",
  pages =        "2034--2035",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 6 16:43:21 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Social networks, mobility and the cloud represent
                 special and unique opportunities for synergy among
                 several existing and emerging communities that are now
                 often evolving in isolated silos. All three areas hold
                 much promise for the future of computing, and represent
                 significant challenges for large scale data management.
                 As these three areas evolve, their direct influence on
                 significant decisions on each other becomes evident and
                 critical. This panel will bring together a set of
                 renowned researchers who will explore and discuss the
                 synergy and tensions among critical and often
                 intertwined research and application issues that arise
                 in the context of social networks and mobility in a
                 cloud infrastructure setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bouros:2012:STS,
  author =       "Panagiotis Bouros and Shen Ge and Nikos Mamoulis",
  title =        "Spatio-textual similarity joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "1",
  pages =        "1--12",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 22 12:18:56 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a collection of objects that carry both spatial
                 and textual information, a spatio-textual similarity
                 join retrieves the pairs of objects that are spatially
                 close and textually similar. As an example, consider a
                 social network with spatially and textually tagged
                 persons (i.e., their locations and profiles). A useful
                 task (for friendship recommendation) would be to find
                 pairs of persons that are spatially close and their
                 profiles have a large overlap (i.e., they have common
                 interests). Another application is data de-duplication
                 (e.g., finding photographs which are spatially close to
                 each other and high overlap in their descriptive tags).
                 Despite the importance of this operation, there is very
                 little previous work that studies its efficient
                 evaluation and in fact under a different definition;
                 only the best match for each object is identified. In
                 this paper, we combine ideas from state-of-the-art
                 spatial distance join and set similarity join methods
                 and propose efficient algorithms that take into account
                 both spatial and textual constraints. Besides, we
                 propose a batch processing technique which boosts the
                 performance of our approaches. An experimental
                 evaluation using real and synthetic datasets shows that
                 our optimized techniques are orders of magnitude faster
                 than base-line solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Drosou:2012:DDR,
  author =       "Marina Drosou and Evaggelia Pitoura",
  title =        "{DisC} diversity: result diversification based on
                 dissimilarity and coverage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "1",
  pages =        "13--24",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 22 12:18:56 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, result diversification has attracted a lot
                 of attention as a means to improve the quality of
                 results retrieved by user queries. In this paper, we
                 propose a new, intuitive definition of diversity called
                 DisC diversity. A DisC diverse subset of a query result
                 contains objects such that each object in the result is
                 represented by a similar object in the diverse subset
                 and the objects in the diverse subset are dissimilar to
                 each other. We show that locating a minimum DisC
                 diverse subset is an NP-hard problem and provide
                 heuristics for its approximation. We also propose
                 adapting DisC diverse subsets to a different degree of
                 diversification. We call this operation zooming. We
                 present efficient implementations of our algorithms
                 based on the M-tree, a spatial index structure, and
                 experimentally evaluate their performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeng:2012:DPF,
  author =       "Chen Zeng and Jeffrey F. Naughton and Jin-Yi Cai",
  title =        "On differentially private frequent itemset mining",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "1",
  pages =        "25--36",
  month =        nov,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 22 12:18:56 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We consider differentially private frequent itemset
                 mining. We begin by exploring the theoretical
                 difficulty of simultaneously providing good utility and
                 good privacy in this task. While our analysis proves
                 that in general this is very difficult, it leaves a
                 glimmer of hope in that our proof of difficulty relies
                 on the existence of long transactions (that is,
                 transactions containing many items). Accordingly, we
                 investigate an approach that begins by truncating long
                 transactions, trading off errors introduced by the
                 truncation with those introduced by the noise added to
                 guarantee privacy. Experimental results over standard
                 benchmark databases show that truncating is indeed
                 effective. Our algorithm solves the ``classical''
                 frequent itemset mining problem, in which the goal is
                 to find all itemsets whose support exceeds a threshold.
                 Related work has proposed differentially private
                 algorithms for the top-$k$ itemset mining problem
                 (``find the $k$ most frequent itemsets''.) An
                 experimental comparison with those algorithms show that
                 our algorithm achieves better $F$-score unless $k$ is
                 small.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2012:LMS,
  author =       "Xin Luna Dong and Barna Saha and Divesh Srivastava",
  title =        "Less is more: selecting sources wisely for
                 integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "37--48",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We are often thrilled by the abundance of information
                 surrounding us and wish to integrate data from as many
                 sources as possible. However, understanding, analyzing,
                 and using these data are often hard. Too much data can
                 introduce a huge integration cost, such as expenses for
                 purchasing data and resources for integration and
                 cleaning. Furthermore, including low-quality data can
                 even deteriorate the quality of integration results
                 instead of bringing the desired quality gain. Thus,
                 ``the more the better'' does not always hold for data
                 integration and often ``less is more''. In this paper,
                 we study how to select a subset of sources before
                 integration such that we can balance the quality of
                 integrated data and integration cost. Inspired by the
                 Marginalism principle in economic theory, we wish to
                 integrate a new source only if its marginal gain, often
                 a function of improved integration quality, is higher
                 than the marginal cost, associated with data-purchase
                 expense and integration resources. As a first step
                 towards this goal, we focus on data fusion tasks, where
                 the goal is to resolve conflicts from different
                 sources. We propose a randomized solution for selecting
                 sources for fusion and show empirically its
                 effectiveness and scalability on both real-world data
                 and synthetic data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2012:DTA,
  author =       "Wenchao Zhou and Suyog Mapara and Yiqing Ren and Yang
                 Li and Andreas Haeberlen and Zachary Ives and Boon Thau
                 Loo and Micah Sherr",
  title =        "Distributed time-aware provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "49--60",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ability to reason about changes in a distributed
                 system's state enables network administrators to better
                 diagnose protocol misconfigurations, detect intrusions,
                 and pinpoint performance bottlenecks. We propose a
                 novel provenance model called Distributed Time-aware
                 Provenance (DTaP) that aids forensics and debugging in
                 distributed systems by explicitly representing time,
                 distributed state, and state changes. Using a
                 distributed Datalog abstraction for modeling
                 distributed protocols, we prove that the DTaP model
                 provides a sound and complete representation that
                 correctly captures dependencies among events in a
                 distributed system. We additionally introduce DistTape,
                 an implementation of the DTaP model that uses novel
                 distributed storage structures, query processing, and
                 cost-based optimization techniques to efficiently query
                 time-aware provenance in a distributed setting. Using
                 two example systems (declarative network routing and
                 Hadoop MapReduce), we demonstrate that DistTape can
                 efficiently maintain and query time-aware provenance at
                 low communication and computation cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Calvanese:2012:QPU,
  author =       "Diego Calvanese and Giuseppe {De Giacomo} and Maurizio
                 Lenzerini and Moshe Y. Vardi",
  title =        "Query processing under {GLAV} mappings for relational
                 and graph databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "61--72",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Schema mappings establish a correspondence between
                 data stored in two databases, called source and target
                 respectively. Query processing under schema mappings
                 has been investigated extensively in the two cases
                 where each target atom is mapped to a query over the
                 source (called GAV, global-as-view), and where each
                 source atom is mapped to a query over the target
                 (called LAV, local-as-view). The general case, called
                 GLAV, in which queries over the source are mapped to
                 queries over the target, has attracted a lot of
                 attention recently, especially for data exchange.
                 However, query processing for GLAV mappings has been
                 considered only for the basic service of query
                 answering, and mainly in the context of conjunctive
                 queries (CQs) in relational databases. In this paper we
                 study query processing for GLAV mappings in a wider
                 sense, considering not only query answering, but also
                 query rewriting, perfectness (the property of a
                 rewriting to compute exactly the certain answers), and
                 query containment relative to a mapping. We deal both
                 with the relational case, and with graph databases,
                 where the basic querying mechanism is that of regular
                 path queries. Query answering in GLAV can be smoothly
                 reduced to a combination of the LAV and GAV cases, and
                 for CQs this reduction can be exploited also for the
                 remaining query processing tasks. In contrast, as we
                 show, GLAV query processing for graph databases is
                 non-trivial and requires new insights and techniques.
                 We obtain upper bounds for answering, rewriting, and
                 perfectness, and show decidability of relative
                 containment.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mouratidis:2012:CIR,
  author =       "Kyriakos Mouratidis and HweeHwa Pang",
  title =        "Computing immutable regions for subspace top-$k$
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "73--84",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a high-dimensional dataset, a top-$k$ query can
                 be used to shortlist the $k$ tuples that best match the
                 user's preferences. Typically, these preferences regard
                 a subset of the available dimensions (i.e., attributes)
                 whose relative significance is expressed by
                 user-specified weights. Along with the query result, we
                 propose to compute for each involved dimension the
                 maximal deviation to the corresponding weight for which
                 the query result remains valid. The derived weight
                 ranges, called immutable regions, are useful for
                 performing sensitivity analysis, for fine-tuning the
                 query weights, etc. In this paper, we focus on top-$k$
                 queries with linear preference functions over the
                 queried dimensions. We codify the conditions under
                 which changes in a dimension's weight invalidate the
                 query result, and develop algorithms to compute the
                 immutable regions. In general, this entails the
                 examination of numerous non-result tuples. To reduce
                 processing time, we introduce a pruning technique and a
                 thresholding mechanism that allow the immutable regions
                 to be determined correctly after examining only a small
                 number of non-result tuples. We demonstrate empirically
                 that the two techniques combine well to form a robust
                 and highly resource-efficient algorithm. We verify the
                 generality of our findings using real high-dimensional
                 data from different domains (documents, images, etc)
                 and with different characteristics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhao:2012:LSC,
  author =       "Feng Zhao and Anthony K. H. Tung",
  title =        "Large scale cohesive subgraphs discovery for social
                 network visual analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "85--96",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphs are widely used in large scale social network
                 analysis nowadays. Not only analysts need to focus on
                 cohesive subgraphs to study patterns among social
                 actors, but also normal users are interested in
                 discovering what happening in their neighborhood.
                 However, effectively storing large scale social network
                 and efficiently identifying cohesive subgraphs is
                 challenging. In this work we introduce a novel subgraph
                 concept to capture the cohesion in social interactions,
                 and propose an I/O efficient approach to discover
                 cohesive subgraphs. Besides, we propose an analytic
                 system which allows users to perform intuitive, visual
                 browsing on large scale social networks. Our system
                 stores the network as a social graph in the graph
                 database, retrieves a local cohesive subgraph based on
                 the input keywords, and then hierarchically visualizes
                 the subgraph out on orbital layout, in which more
                 important social actors are located in the center. By
                 summarizing textual interactions between social actors
                 as tag cloud, we provide a way to quickly locate active
                 social communities and their interactions in a unified
                 view.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2012:TFD,
  author =       "Xian Li and Xin Luna Dong and Kenneth Lyons and Weiyi
                 Meng and Divesh Srivastava",
  title =        "Truth finding on the {Deep Web}: is the problem
                 solved?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "97--108",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The amount of useful information available on the Web
                 has been growing at a dramatic pace in recent years and
                 people rely more and more on the Web to fulfill their
                 information needs. In this paper, we study truthfulness
                 of Deep Web data in two domains where we believed data
                 are fairly clean and data quality is important to
                 people's lives: Stock and Flight. To our surprise, we
                 observed a large amount of inconsistency on data from
                 different sources and also some sources with quite low
                 accuracy. We further applied on these two data sets
                 state-of-the-art data fusion methods that aim at
                 resolving conflicts and finding the truth, analyzed
                 their strengths and limitations, and suggested
                 promising research directions. We wish our study can
                 increase awareness of the seriousness of conflicting
                 data on the Web and in turn inspire more research in
                 our community to tackle this problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Marcus:2012:CC,
  author =       "Adam Marcus and David Karger and Samuel Madden and
                 Robert Miller and Sewoong Oh",
  title =        "Counting with the crowd",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "109--120",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we address the problem of selectivity
                 estimation in a crowdsourced database. Specifically, we
                 develop several techniques for using workers on a
                 crowdsourcing platform like Amazon's Mechanical Turk to
                 estimate the fraction of items in a dataset (e.g., a
                 collection of photos) that satisfy some property or
                 predicate (e.g., photos of trees). We do this without
                 explicitly iterating through every item in the dataset.
                 This is important in crowd-sourced query optimization
                 to support predicate ordering and in query evaluation,
                 when performing a GROUP BY operation with a COUNT or
                 AVG aggregate. We compare sampling item labels, a
                 traditional approach, to showing workers a collection
                 of items and asking them to estimate how many satisfy
                 some predicate. Additionally, we develop techniques to
                 eliminate spammers and colluding attackers trying to
                 skew selectivity estimates when using this count
                 estimation approach. We find that for images, counting
                 can be much more effective than sampled labeling,
                 reducing the amount of work necessary to arrive at an
                 estimate that is within 1\% of the true fraction by up
                 to an order of magnitude, with lower worker latency. We
                 also find that sampled labeling outperforms count
                 estimation on a text processing task, presumably
                 because people are better at quickly processing large
                 batches of images than they are at reading strings of
                 text. Our spammer detection technique, which is
                 applicable to both the label- and count-based
                 approaches, can improve accuracy by up to two orders of
                 magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zou:2012:CDA,
  author =       "Tao Zou and Ronan {Le Bras} and Marcos {Vaz Salles}
                 and Alan Demers and Johannes Gehrke",
  title =        "{ClouDiA}: a deployment advisor for public clouds",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "121--132",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "An increasing number of distributed data-driven
                 applications are moving into shared public clouds. By
                 sharing resources and operating at scale, public clouds
                 promise higher utilization and lower costs than private
                 clusters. To achieve high utilization, however, cloud
                 providers inevitably allocate virtual machine instances
                 noncontiguously, i.e., instances of a given application
                 may end up in physically distant machines in the cloud.
                 This allocation strategy can lead to large differences
                 in average latency between instances. For a large class
                 of applications, this difference can result in
                 significant performance degradation, unless care is
                 taken in how application components are mapped to
                 instances. In this paper, we propose ClouDiA, a general
                 deployment advisor that selects application node
                 deployments minimizing either (i) the largest latency
                 between application nodes, or (ii) the longest critical
                 path among all application nodes. ClouDiA employs
                 mixed-integer programming and constraint programming
                 techniques to efficiently search the space of possible
                 mappings of application nodes to instances. Through
                 experiments with synthetic and real applications in
                 Amazon EC2, we show that our techniques yield a 15\% to
                 55\% reduction in time-to-solution or service response
                 time, without any need for modifying application
                 code.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2012:DCS,
  author =       "Jinsoo Lee and Wook-Shin Han and Romans Kasperovics
                 and Jeong-Hoon Lee",
  title =        "An in-depth comparison of subgraph isomorphism
                 algorithms in graph databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "133--144",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding subgraph isomorphisms is an important problem
                 in many applications which deal with data modeled as
                 graphs. While this problem is NP-hard, in recent years,
                 many algorithms have been proposed to solve it in a
                 reasonable time for real datasets using different join
                 orders, pruning rules, and auxiliary neighborhood
                 information. However, since they have not been
                 empirically compared one another in most research work,
                 it is not clear whether the later work outperforms the
                 earlier work. Another problem is that reported
                 comparisons were often done using the original authors'
                 binaries which were written in different programming
                 environments. In this paper, we address these serious
                 problems by re-implementing five state-of-the-art
                 subgraph isomorphism algorithms in a common code base
                 and by comparing them using many real-world datasets
                 and their query loads. Through our in-depth analysis of
                 experimental results, we report surprising empirical
                 findings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2012:LLM,
  author =       "Kun Ren and Alexander Thomson and Daniel J. Abadi",
  title =        "Lightweight locking for main memory database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "2",
  pages =        "145--156",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:14 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Locking is widely used as a concurrency control
                 mechanism in database systems. As more OLTP databases
                 are stored mostly or entirely in memory, transactional
                 throughput is less and less limited by disk IO, and
                 lock managers increasingly become performance
                 bottlenecks. In this paper, we introduce very
                 lightweight locking (VLL), an alternative approach to
                 pessimistic concurrency control for main-memory
                 database systems that avoids almost all overhead
                 associated with traditional lock manager operations. We
                 also propose a protocol called selective contention
                 analysis (SCA), which enables systems implementing VLL
                 to achieve high transactional throughput under high
                 contention workloads. We implement these protocols both
                 in a traditional single-machine multi-core database
                 server setting and in a distributed database where data
                 is partitioned across many commodity machines in a
                 shared-nothing cluster. Our experiments show that VLL
                 dramatically reduces locking overhead and thereby
                 increases transactional throughput in both settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2013:LPP,
  author =       "Ye Zhang and Wai-Kit Wong and S. M. Yiu and Nikos
                 Mamoulis and David W. Cheung",
  title =        "Lightweight privacy-preserving peer-to-peer data
                 integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "3",
  pages =        "157--168",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:18 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Peer Data Management Systems (PDMS) are an attractive
                 solution for managing distributed heterogeneous
                 information. When a peer (client) requests data from
                 another peer (server) with a different schema,
                 translations of the query and its answer are done by a
                 sequence of intermediate peers (translators). There are
                 two privacy issues in this P2P data integration
                 process: (i) answer privacy: no unauthorized parties
                 (including the translators) should learn the query
                 result; (ii) mapping privacy: the schema and the value
                 mappings used by the translators to perform the
                 translation should not be revealed to other peers.
                 Elmeleegy and Ouzzani proposed the PPP protocol that is
                 the first to support privacy-preserving querying in
                 PDMS. However, PPP suffers from several shortcomings.
                 First, PPP does not satisfy the requirement of answer
                 privacy, because it is based on commutative encryption;
                 we show that this issue can be fixed by adopting
                 another cryptographic technique called oblivious
                 transfer. Second, PPP adopts a weaker notion for
                 mapping privacy, which allows the client peer to
                 observe certain mappings done by translators. In this
                 paper, we develop a lightweight protocol, which
                 satisfies mapping privacy and extend it to a more
                 complex one that facilitates parallel translation by
                 peers. Furthermore, we consider a stronger adversary
                 model where there may be collusions among peers and
                 propose an efficient protocol that guards against
                 collusions. We conduct an experimental study on the
                 performance of the proposed protocols using both real
                 and synthetic data. The results show that the proposed
                 protocols not only achieve a better privacy guarantee
                 than PPP, but they are also more efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2013:MEM,
  author =       "Yang Li and Pegah Kamousi and Fangqiu Han and Shengqi
                 Yang and Xifeng Yan and Subhash Suri",
  title =        "Memory efficient minimum substring partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "3",
  pages =        "169--180",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:18 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Massively parallel DNA sequencing technologies are
                 revolutionizing genomics research. Billions of short
                 reads generated at low costs can be assembled for
                 reconstructing the whole genomes. Unfortunately, the
                 large memory footprint of the existing de novo assembly
                 algorithms makes it challenging to get the assembly
                 done for higher eukaryotes like mammals. In this work,
                 we investigate the memory issue of constructing de
                 Bruijn graph, a core task in leading assembly
                 algorithms, which often consumes several hundreds of
                 gigabytes memory for large genomes. We propose a
                 disk-based partition method, called Minimum Substring
                 Partitioning (MSP), to complete the task using less
                 than 10 gigabytes memory, without runtime slowdown. MSP
                 breaks the short reads into multiple small disjoint
                 partitions so that each partition can be loaded into
                 memory, processed individually and later merged with
                 others to form a de Bruijn graph. By leveraging the
                 overlaps among the $k$-mers (substring of length k),
                 MSP achieves astonishing compression ratio: The total
                 size of partitions is reduced from $ \Theta (k n) $ to
                 $ \Theta (n) $, where $n$ is the size of the short read
                 database, and $k$ is the length of a $k$-mer.
                 Experimental results show that our method can build de
                 Bruijn graphs using a commodity computer for any
                 large-volume sequence dataset.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khan:2013:NFG,
  author =       "Arijit Khan and Yinghui Wu and Charu C. Aggarwal and
                 Xifeng Yan",
  title =        "{NeMa}: fast graph search with label similarity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "3",
  pages =        "181--192",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:18 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "It is increasingly common to find real-life data
                 represented as networks of labeled, heterogeneous
                 entities. To query these networks, one often needs to
                 identify the matches of a given query graph in a
                 (typically large) network modeled as a target graph.
                 Due to noise and the lack of fixed schema in the target
                 graph, the query graph can substantially differ from
                 its matches in the target graph in both structure and
                 node labels, thus bringing challenges to the graph
                 querying tasks. In this paper, we propose NeMa (Network
                 Match), a neighborhood-based subgraph matching
                 technique for querying real-life networks. (1) To
                 measure the quality of the match, we propose a novel
                 subgraph matching cost metric that aggregates the costs
                 of matching individual nodes, and unifies both
                 structure and node label similarities. (2) Based on the
                 metric, we formulate the minimum cost subgraph matching
                 problem. Given a query graph and a target graph, the
                 problem is to identify the (top-$k$) matches of the
                 query graph with minimum costs in the target graph. We
                 show that the problem is NP-hard, and also hard to
                 approximate. (3) We propose a heuristic algorithm for
                 solving the problem based on an inference model. In
                 addition, we propose optimization techniques to improve
                 the efficiency of our method. (4) We empirically verify
                 that NeMa is both effective and efficient compared to
                 the keyword search and various state-of-the-art graph
                 querying techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lin:2013:PPS,
  author =       "Xika Lin and Abhishek Mukherji and Elke A.
                 Rundensteiner and Carolina Ruiz and Matthew O. Ward",
  title =        "{PARAS}: a parameter space framework for online
                 association mining",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "3",
  pages =        "193--204",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:18 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Association rule mining is known to be computationally
                 intensive, yet real-time decision-making applications
                 are increasingly intolerant to delays. In this paper,
                 we introduce the parameter space model, called PARAS.
                 PARAS enables efficient rule mining by compactly
                 maintaining the final rulesets. The PARAS model is
                 based on the notion of stable region abstractions that
                 form the coarse granularity ruleset space. Based on new
                 insights on the redundancy relationships among rules,
                 PARAS establishes a surprisingly compact representation
                 of complex redundancy relationships while enabling
                 efficient redundancy resolution at query-time. Besides
                 the classical rule mining requests, the PARAS model
                 supports three novel classes of exploratory queries.
                 Using the proposed PSpace index, these exploratory
                 query classes can all be answered with near real-time
                 responsiveness. Our experimental evaluation using
                 several benchmark datasets demonstrates that PARAS
                 achieves 2 to 5 orders of magnitude improvement over
                 state-of-the-art approaches in online association rule
                 mining.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yan:2013:ASF,
  author =       "Zhepeng Yan and Nan Zheng and Zachary G. Ives and
                 Partha Pratim Talukdar and Cong Yu",
  title =        "Actively soliciting feedback for query answers in
                 keyword search-based data integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "3",
  pages =        "205--216",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:18 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of scaling up data integration, such that
                 new sources can be quickly utilized as they are
                 discovered, remains elusive: global schemas for
                 integrated data are difficult to develop and expand,
                 and schema and record matching techniques are limited
                 by the fact that data and metadata are often
                 under-specified and must be disambiguated by data
                 experts. One promising approach is to avoid using a
                 global schema, and instead to develop keyword
                 search-based data integration--where the system lazily
                 discovers associations enabling it to join together
                 matches to keywords, and return ranked results. The
                 user is expected to understand the data domain and
                 provide feedback about answers' quality. The system
                 generalizes such feedback to learn how to correctly
                 integrate data. A major open challenge is that under
                 this model, the user only sees and offers feedback on a
                 few ``top-$k$'' results: this result set must be
                 carefully selected to include answers of high relevance
                 and answers that are highly informative when feedback
                 is given on them. Existing systems merely focus on
                 predicting relevance, by composing the scores of
                 various schema and record matching algorithms. In this
                 paper we show how to predict the uncertainty associated
                 with a query result's score, as well as how informative
                 feedback is on a given result. We build upon these
                 foundations to develop an active learning approach to
                 keyword search-based data integration, and we validate
                 the effectiveness of our solution over real data from
                 several very different domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2013:SKQ,
  author =       "Lisi Chen and Gao Cong and Christian S. Jensen and
                 Dingming Wu",
  title =        "Spatial keyword query processing: an experimental
                 evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "3",
  pages =        "217--228",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:18 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Geo-textual indices play an important role in spatial
                 keyword querying. The existing geo-textual indices have
                 not been compared systematically under the same
                 experimental framework. This makes it difficult to
                 determine which indexing technique best supports
                 specific functionality. We provide an all-around survey
                 of 12 state-of-the-art geo-textual indices. We propose
                 a benchmark that enables the comparison of the spatial
                 keyword query performance. We also report on the
                 findings obtained when applying the benchmark to the
                 indices, thus uncovering new insights that may guide
                 index selection as well as further research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eftekhar:2013:PRT,
  author =       "Milad Eftekhar and Nick Koudas",
  title =        "Partitioning and ranking tagged data sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "4",
  pages =        "229--240",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:22 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Online types of expression in the form of social
                 networks, micro-blogging, blogs and rich content
                 sharing platforms have proliferated in the last few
                 years. Such proliferation contributed to the vast
                 explosion in online data sharing we are experiencing
                 today. One unique aspect of online data sharing is tags
                 manually inserted by content generators to facilitate
                 content description and discovery (e.g., hashtags in
                 tweets). In this paper we focus on these tags and we
                 study and propose algorithms that make use of tags in
                 order to automatically organize and categorize this
                 vast collection of socially contributed and tagged
                 information. In particular, we take a holistic approach
                 in organizing such tags and we propose algorithms to
                 partition as well as rank this information collection.
                 Our partitioning algorithms aim to segment the entire
                 collection of tags (and the associated content) into a
                 specified number of partitions for specific problem
                 constraints. In contrast our ranking algorithms aim to
                 identify few partitions fast, for suitably defined
                 ranking functions. We present a detailed experimental
                 study utilizing the full twitter firehose (set of all
                 tweets in the Twitter service) that attests to the
                 practical utility and effectiveness of our overall
                 approach. We also present a detailed qualitative study
                 of our results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Badia:2013:EIG,
  author =       "Antonio Badia and Bin Cao",
  title =        "Efficient implementation of generalized quantification
                 in relational query languages",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "4",
  pages =        "241--252",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:22 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present research aimed at improving our
                 understanding of the use and implementation of
                 quantification in relational query languages in general
                 and SQL in particular. In order to make our results as
                 general as possible, we use the framework of
                 Generalized Quantification. Generalized Quantifiers
                 (GQs) are high-level, declarative logical operators
                 that in the past have been studied from a theoretical
                 perspective. In this paper we focus on their practical
                 use, showing how to incorporate a dynamic set of GQs in
                 relational query languages, how to implement them
                 efficiently and use them in the context of SQL. We
                 present experimental evidence of the performance of the
                 approach, showing that it improves over traditional
                 (relational) approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2013:DWD,
  author =       "Rui Liu and Ashraf Aboulnaga and Kenneth Salem",
  title =        "{DAX}: a widely distributed multitenant storage
                 service for {DBMS} hosting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "4",
  pages =        "253--264",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:22 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many applications hosted on the cloud have
                 sophisticated data management needs that are best
                 served by a SQL-based relational DBMS. It is not
                 difficult to run a DBMS in the cloud, and in many cases
                 one DBMS instance is enough to support an application's
                 workload. However, a DBMS running in the cloud (or even
                 on a local server) still needs a way to persistently
                 store its data and protect it against failures. One way
                 to achieve this is to provide a scalable and reliable
                 storage service that the DBMS can access over a
                 network. This paper describes such a service, which we
                 call DAX. DAX relies on multi-master replication and
                 Dynamo-style flexible consistency, which enables it to
                 run in multiple data centers and hence be disaster
                 tolerant. Flexible consistency allows DAX to control
                 the consistency level of each read or write operation,
                 choosing between strong consistency at the cost of high
                 latency or weak consistency with low latency. DAX makes
                 this choice for each read or write operation by
                 applying protocols that we designed based on the
                 storage tier usage characteristics of database systems.
                 With these protocols, DAX provides a storage service
                 that can host multiple DBMS tenants, scaling with the
                 number of tenants and the required storage capacity and
                 bandwidth. DAX also provides high availability and
                 disaster tolerance for the DBMS storage tier.
                 Experiments using the TPC-C benchmark show that DAX
                 provides up to a factor of 4 performance improvement
                 over baseline solutions that do not exploit flexible
                 consistency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeng:2013:DGE,
  author =       "Kai Zeng and Jiacheng Yang and Haixun Wang and Bin
                 Shao and Zhongyuan Wang",
  title =        "A distributed graph engine for web scale {RDF} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "4",
  pages =        "265--276",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:22 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Much work has been devoted to supporting RDF data. But
                 state-of-the-art systems and methods still cannot
                 handle web scale RDF data effectively. Furthermore,
                 many useful and general purpose graph-based operations
                 (e.g., random walk, reachability, community discovery)
                 on RDF data are not supported, as most existing systems
                 store and index data in particular ways (e.g., as
                 relational tables or as a bitmap matrix) to maximize
                 one particular operation on RDF data: SPARQL query
                 processing. In this paper, we introduce Trinity. RDF, a
                 distributed, memory-based graph engine for web scale
                 RDF data. Instead of managing the RDF data in triple
                 stores or as bitmap matrices, we store RDF data in its
                 native graph form. It achieves much better (sometimes
                 orders of magnitude better) performance for SPARQL
                 queries than the state-of-the-art approaches.
                 Furthermore, since the data is stored in its native
                 graph form, the system can support other operations
                 (e.g., random walks, reachability) on RDF graphs as
                 well. We conduct comprehensive experimental studies on
                 real life, web scale RDF data to demonstrate the
                 effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sarma:2013:ULB,
  author =       "Anish Das Sarma and Foto N. Afrati and Semih Salihoglu
                 and Jeffrey D. Ullman",
  title =        "Upper and lower bounds on the cost of a map-reduce
                 computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "4",
  pages =        "277--288",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:22 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper we study the tradeoff between
                 parallelism and communication cost in a map-reduce
                 computation. For any problem that is not
                 ``embarrassingly parallel,'' the finer we partition the
                 work of the reducers so that more parallelism can be
                 extracted, the greater will be the total communication
                 between mappers and reducers. We introduce a model of
                 problems that can be solved in a single round of
                 map-reduce computation. This model enables a generic
                 recipe for discovering lower bounds on communication
                 cost as a function of the maximum number of inputs that
                 can be assigned to one reducer. We use the model to
                 analyze the tradeoff for three problems: finding pairs
                 of strings at Hamming distance d, finding triangles and
                 other patterns in a larger graph, and matrix
                 multiplication. For finding strings of Hamming distance
                 1, we have upper and lower bounds that match exactly.
                 For triangles and many other graphs, we have upper and
                 lower bounds that are the same to within a constant
                 factor. For the problem of matrix multiplication, we
                 have matching upper and lower bounds for one-round
                 map-reduce algorithms. We are also able to explore
                 two-round map-reduce algorithms for matrix
                 multiplication and show that these never have more
                 communication, for a given reducer size, than the best
                 one-round algorithm, and often have significantly
                 less.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tu:2013:PAQ,
  author =       "Stephen Tu and M. Frans Kaashoek and Samuel Madden and
                 Nickolai Zeldovich",
  title =        "Processing analytical queries over encrypted data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "5",
  pages =        "289--300",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MONOMI is a system for securely executing analytical
                 workloads over sensitive data on an untrusted database
                 server. MONOMI works by encrypting the entire database
                 and running queries over the encrypted data. MONOMI
                 introduces split client/server query execution, which
                 can execute arbitrarily complex queries over encrypted
                 data, as well as several techniques that improve
                 performance for such workloads, including per-row
                 precomputation, space-efficient encryption, grouped
                 homomorphic addition, and pre-filtering. Since these
                 optimizations are good for some queries but not others,
                 MONOMI introduces a designer for choosing an efficient
                 physical design at the server for a given workload, and
                 a planner to choose an efficient execution plan for a
                 given query at runtime. A prototype of MONOMI running
                 on top of Postgres can execute most of the queries from
                 the TPC-H benchmark with a median overhead of only $
                 1.24 \times $ (ranging from $ 1.03 \times $ to $ 2.33
                 \times $) compared to an un-encrypted Postgres database
                 where a compromised server would reveal all data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kellaris:2013:PDP,
  author =       "Georgios Kellaris and Stavros Papadopoulos",
  title =        "Practical differential privacy via grouping and
                 smoothing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "5",
  pages =        "301--312",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We address one-time publishing of non-overlapping
                 counts with $ \epsilon $-differential privacy. These
                 statistics are useful in a wide and important range of
                 applications, including transactional, traffic and
                 medical data analysis. Prior work on the topic
                 publishes such statistics with prohibitively low
                 utility in several practical scenarios. Towards this
                 end, we present GS, a method that pre-processes the
                 counts by elaborately grouping and smoothing them via
                 averaging. This step acts as a form of preliminary
                 perturbation that diminishes sensitivity, and enables
                 GS to achieve $ \epsilon $-differential privacy through
                 low Laplace noise injection. The grouping strategy is
                 dictated by a sampling mechanism, which minimizes the
                 smoothing perturbation. We demonstrate the superiority
                 of GS over its competitors, and confirm its
                 practicality, via extensive experiments on real
                 datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kaushik:2013:SSD,
  author =       "Raghav Kaushik and Yupeng Fu and Ravishankar
                 Ramamurthy",
  title =        "On scaling up sensitive data auditing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "5",
  pages =        "313--324",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper studies the following problem: given (1) a
                 query and (2) a set of sensitive records, find the
                 subset of records ``accessed'' by the query. The notion
                 of a query accessing a single record is adopted from
                 prior work. There are several scenarios where the
                 number of sensitive records is large (in the millions).
                 The novel challenge addressed in this work is to
                 develop a general-purpose solution for complex SQL that
                 scales in the number of sensitive records. We propose
                 efficient techniques that improves upon straightforward
                 alternatives by orders of magnitude. Our empirical
                 evaluation over the TPC-H benchmark data illustrates
                 the benefits of our techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sathiamoorthy:2013:XEN,
  author =       "Maheswaran Sathiamoorthy and Megasthenis Asteris and
                 Dimitris Papailiopoulos and Alexandros G. Dimakis and
                 Ramkumar Vadali and Scott Chen and Dhruba Borthakur",
  title =        "{XORing} elephants: novel erasure codes for big data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "5",
  pages =        "325--336",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed storage systems for large clusters
                 typically use replication to provide reliability.
                 Recently, erasure codes have been used to reduce the
                 large storage overhead of three-replicated systems.
                 Reed--Solomon codes are the standard design choice and
                 their high repair cost is often considered an
                 unavoidable price to pay for high storage efficiency
                 and high reliability. This paper shows how to overcome
                 this limitation. We present a novel family of erasure
                 codes that are efficiently repairable and offer higher
                 reliability compared to Reed--Solomon codes. We show
                 analytically that our codes are optimal on a recently
                 identified tradeoff between locality and minimum
                 distance. We implement our new codes in Hadoop HDFS and
                 compare to a currently deployed HDFS module that uses
                 Reed--Solomon codes. Our modified HDFS implementation
                 shows a reduction of approximately $ 2 \times $ on the
                 repair disk I/O and repair network traffic. The
                 disadvantage of the new coding scheme is that it
                 requires 14\% more storage compared to Reed--Solomon
                 codes, an overhead shown to be information
                 theoretically optimal to obtain locality. Because the
                 new codes repair failures faster, this provides higher
                 reliability, which is orders of magnitude higher
                 compared to replication.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rendle:2013:SFM,
  author =       "Steffen Rendle",
  title =        "Scaling factorization machines to relational data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "5",
  pages =        "337--348",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The most common approach in predictive modeling is to
                 describe cases with feature vectors (aka design
                 matrix). Many machine learning methods such as linear
                 regression or support vector machines rely on this
                 representation. However, when the underlying data has
                 strong relational patterns, especially relations with
                 high cardinality, the design matrix can get very large
                 which can make learning and prediction slow or even
                 infeasible. This work solves this issue by making use
                 of repeating patterns in the design matrix which stem
                 from the underlying relational structure of the data.
                 It is shown how coordinate descent learning and
                 Bayesian Markov Chain Monte Carlo inference can be
                 scaled for linear regression and factorization machine
                 models. Empirically, it is shown on two large scale and
                 very competitive datasets (Netflix prize, KDDCup 2012),
                 that (1) standard learning algorithms based on the
                 design matrix representation cannot scale to relational
                 predictor variables, (2) the proposed new algorithms
                 scale and (3) the predictive quality of the proposed
                 generic feature-based approach is as good as the best
                 specialized models that have been tailored to the
                 respective tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Whang:2013:QSC,
  author =       "Steven Euijong Whang and Peter Lofgren and Hector
                 Garcia-Molina",
  title =        "Question selection for crowd entity resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "349--360",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of enhancing Entity Resolution
                 (ER) with the help of crowdsourcing. ER is the problem
                 of clustering records that refer to the same real-world
                 entity and can be an extremely difficult process for
                 computer algorithms alone. For example, figuring out
                 which images refer to the same person can be a hard
                 task for computers, but an easy one for humans. We
                 study the problem of resolving records with
                 crowdsourcing where we ask questions to humans in order
                 to guide ER into producing accurate results. Since
                 human work is costly, our goal is to ask as few
                 questions as possible. We propose a probabilistic
                 framework for ER that can be used to estimate how much
                 ER accuracy we obtain by asking each question and
                 select the best question with the highest expected
                 accuracy. Computing the expected accuracy is \#P-hard,
                 so we propose approximation techniques for efficient
                 computation. We evaluate our best question algorithms
                 on real and synthetic datasets and demonstrate how we
                 can obtain high ER accuracy while significantly
                 reducing the number of questions asked to humans.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jindal:2013:CKB,
  author =       "Alekh Jindal and Endre Palatinus and Vladimir Pavlov
                 and Jens Dittrich",
  title =        "A comparison of knives for bread slicing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "361--372",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Vertical partitioning is a crucial step in physical
                 database design in row-oriented databases. A number of
                 vertical partitioning algorithms have been proposed
                 over the last three decades for a variety of niche
                 scenarios. In principle, the underlying problem remains
                 the same: decompose a table into one or more vertical
                 partitions. However, it is not clear how good different
                 vertical partitioning algorithms are in comparison to
                 each other. In fact, it is not even clear how to
                 experimentally compare different vertical partitioning
                 algorithms. In this paper, we present an exhaustive
                 experimental study of several vertical partitioning
                 algorithms. We categorize vertical partitioning
                 algorithms along three dimensions. We survey six
                 vertical partitioning algorithms and discuss their pros
                 and cons. We identify the major differences in the
                 use-case settings for different algorithms and describe
                 how to make an apples-to-apples comparison of different
                 vertical partitioning algorithms under the same
                 setting. We propose four metrics to compare vertical
                 partitioning algorithms. We show experimental results
                 from the TPC-H and SSB benchmark and present four key
                 lessons learned: (1) we can do four orders of magnitude
                 less computation and still find the optimal layouts,
                 (2) the benefits of vertical partitioning depend
                 strongly on the database buffer size, (3) HillClimb is
                 the best vertical partitioning algorithm, and (4)
                 vertical partitioning for TPC-H-like benchmarks can
                 improve over column layout by only up to 5\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xiao:2013:EET,
  author =       "Chuan Xiao and Jianbin Qin and Wei Wang and Yoshiharu
                 Ishikawa and Koji Tsuda and Kunihiko Sadakane",
  title =        "Efficient error-tolerant query autocompletion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "373--384",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query autocompletion is an important feature saving
                 users many keystrokes from typing the entire query. In
                 this paper we study the problem of query autocompletion
                 that tolerates errors in users' input using edit
                 distance constraints. Previous approaches index data
                 strings in a trie, and continuously maintain all the
                 prefixes of data strings whose edit distance from the
                 query are within the threshold. The major inherent
                 problem is that the number of such prefixes is huge for
                 the first few characters of the query and is
                 exponential in the alphabet size. This results in slow
                 query response even if the entire query approximately
                 matches only few prefixes. In this paper, we propose a
                 novel neighborhood generation-based algorithm,
                 IncNGTrie, which can achieve up to two orders of
                 magnitude speedup over existing methods for the
                 error-tolerant query autocompletion problem. Our
                 proposed algorithm only maintains a small set of active
                 nodes, thus saving both space and time to process the
                 query. We also study efficient duplicate removal which
                 is a core problem in fetching query answers. In
                 addition, we propose optimization techniques to reduce
                 our index size, as well as discussions on several
                 extensions to our method. The efficiency of our method
                 is demonstrated against existing methods through
                 extensive experiments on real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shraer:2013:TKP,
  author =       "Alexander Shraer and Maxim Gurevich and Marcus
                 Fontoura and Vanja Josifovski",
  title =        "Top-$k$ publish-subscribe for social annotation of
                 news",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "385--396",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Social content, such as Twitter updates, often have
                 the quickest first-hand reports of news events, as well
                 as numerous commentaries that are indicative of public
                 view of such events. As such, social updates provide a
                 good complement to professionally written news
                 articles. In this paper we consider the problem of
                 automatically annotating news stories with social
                 updates (tweets), at a news website serving high volume
                 of pageviews. The high rate of both the pageviews
                 (millions to billions a day) and of the incoming tweets
                 (more than 100 millions a day) make real-time indexing
                 of tweets ineffective, as this requires an index that
                 is both queried and updated extremely frequently. The
                 rate of tweet updates makes caching techniques almost
                 unusable since the cache would become stale very
                 quickly. We propose a novel architecture where each
                 story is treated as a subscription for tweets relevant
                 to the story's content, and new algorithms that
                 efficiently match tweets to stories, proactively
                 maintaining the top-$k$ tweets for each story. Such
                 top-$k$ pub-sub consumes only a small fraction of the
                 resource cost of alternative solutions, and can be
                 applicable to other large scale content-based
                 publish-subscribe problems. We demonstrate the
                 effectiveness of our approach on realworld data: a
                 corpus of news stories from Yahoo! News and a log of
                 Twitter updates.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kolaitis:2013:EQI,
  author =       "Phokion G. Kolaitis and Enela Pema and Wang-Chiew
                 Tan",
  title =        "Efficient querying of inconsistent databases with
                 binary integer programming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "397--408",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "An inconsistent database is a database that violates
                 one or more integrity constraints. A typical approach
                 for answering a query over an inconsistent database is
                 to first clean the inconsistent database by
                 transforming it to a consistent one and then apply the
                 query to the consistent database. An alternative and
                 more principled approach, known as consistent query
                 answering, derives the answers to a query over an
                 inconsistent database without changing the database,
                 but by taking into account all possible repairs of the
                 database. In this paper, we study the problem of
                 consistent query answering over inconsistent databases
                 for the class for conjunctive queries under primary key
                 constraints. We develop a system, called EQUIP, that
                 represents a fundamental departure from existing
                 approaches for computing the consistent answers to
                 queries in this class. At the heart of EQUIP is a
                 technique, based on Binary Integer Programming (BIP),
                 that repeatedly searches for repairs to eliminate
                 candidate consistent answers until no further such
                 candidates can be eliminated. We establish rigorously
                 the correctness of the algorithms behind EQUIP and
                 carry out an extensive experimental investigation that
                 validates the effectiveness of our approach.
                 Specifically, EQUIP exhibits good and stable
                 performance on conjunctive queries under primary key
                 constraints, it significantly outperforms existing
                 systems for computing the consistent answers of such
                 queries in the case in which the consistent answers are
                 not first-order rewritable, and it scales well.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gionis:2013:PSN,
  author =       "Aristides Gionis and Flavio Junqueira and Vincent
                 Leroy and Marco Serafini and Ingmar Weber",
  title =        "Piggybacking on social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "409--420",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The popularity of social-networking sites has
                 increased rapidly over the last decade. A basic
                 functionalities of social-networking sites is to
                 present users with streams of events shared by their
                 friends. At a systems level, materialized per-user
                 views are a common way to assemble and deliver such
                 event streams on-line and with low latency. Access to
                 the data stores, which keep the user views, is a major
                 bottleneck of social-networking systems. We propose to
                 improve the throughput of these systems by using social
                 piggybacking, which consists of processing the requests
                 of two friends by querying and updating the view of a
                 third common friend. By using one such hub view, the
                 system can serve requests of the first friend without
                 querying or updating the view of the second. We show
                 that, given a social graph, social piggybacking can
                 minimize the overall number of requests, but computing
                 the optimal set of hubs is an NP-hard problem. We
                 propose an $ O(\log n) $ approximation algorithm and a
                 heuristic to solve the problem, and evaluate them using
                 the full Twitter and Flickr social graphs, which have
                 up to billions of edges. Compared to existing
                 approaches, using social piggybacking results in
                 similar throughput in systems with few servers, but
                 enables substantial throughput improvements as the size
                 of the system grows, reaching up to a 2-factor
                 increase. We also evaluate our algorithms on a real
                 social networking system prototype and we show that the
                 actual increase in throughput corresponds nicely to the
                 gain anticipated by our cost function.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Adelfio:2013:SET,
  author =       "Marco D. Adelfio and Hanan Samet",
  title =        "Schema extraction for tabular data on the {Web}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "421--432",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Tabular data is an abundant source of information on
                 the Web, but remains mostly isolated from the latter's
                 interconnections since tables lack links and
                 computer-accessible descriptions of their structure. In
                 other words, the schemas of these tables --- attribute
                 names, values, data types, etc. --- are not explicitly
                 stored as table metadata. Consequently, the structure
                 that these tables contain is not accessible to the
                 crawlers that power search engines and thus not
                 accessible to user search queries. We address this lack
                 of structure with a new method for leveraging the
                 principles of table construction in order to extract
                 table schemas. Discovering the schema by which a table
                 is constructed is achieved by harnessing the
                 similarities and differences of nearby table rows
                 through the use of a novel set of features and a
                 feature processing scheme. The schemas of these data
                 tables are determined using a classification technique
                 based on conditional random fields in combination with
                 a novel feature encoding method called logarithmic
                 binning, which is specifically designed for the data
                 table extraction task. Our method provides considerable
                 improvement over the well-known WebTables schema
                 extraction method. In contrast with previous work that
                 focuses on extracting individual relations, our method
                 excels at correctly interpreting full tables, thereby
                 being capable of handling general tables such as those
                 found in spreadsheets, instead of being restricted to
                 HTML tables as is the case with the WebTables method.
                 We also extract additional schema characteristics, such
                 as row groupings, which are important for supporting
                 information retrieval tasks on tabular data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sariyuce:2013:SAK,
  author =       "Ahmet Erdem Sar{\'\i}y{\"u}ce and Bugra Gedik and
                 Gabriela Jacques-Silva and Kun-Lung Wu and {\"U}mit V.
                 {\c{C}}ataly{\"u}rek",
  title =        "Streaming algorithms for $k$-core decomposition",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "433--444",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A $k$-core of a graph is a maximal connected subgraph
                 in which every vertex is connected to at least $k$
                 vertices in the subgraph. $k$-core decomposition is
                 often used in large-scale network analysis, such as
                 community detection, protein function prediction,
                 visualization, and solving NP-Hard problems on real
                 networks efficiently, like maximal clique finding. In
                 many real-world applications, networks change over
                 time. As a result, it is essential to develop efficient
                 incremental algorithms for streaming graph data. In
                 this paper, we propose the first incremental $k$-core
                 decomposition algorithms for streaming graph data.
                 These algorithms locate a small subgraph that is
                 guaranteed to contain the list of vertices whose
                 maximum $k$-core values have to be updated, and
                 efficiently process this subgraph to update the
                 $k$-core decomposition. Our results show a significant
                 reduction in run-time compared to non-incremental
                 alternatives. We show the efficiency of our algorithms
                 on different types of real and synthetic graphs, at
                 different scales. For a graph of 16 million vertices,
                 we observe speedups reaching a million times, relative
                 to the non-incremental algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hassanzadeh:2013:DLP,
  author =       "Oktie Hassanzadeh and Ken Q. Pu and Soheil Hassas
                 Yeganeh and Ren{\'e}e J. Miller and Lucian Popa and
                 Mauricio A. Hern{\'a}ndez and Howard Ho",
  title =        "Discovering linkage points over {Web} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "445--456",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A basic step in integration is the identification of
                 linkage points, i.e., finding attributes that are
                 shared (or related) between data sources, and that can
                 be used to match records or entities across sources.
                 This is usually performed using a match operator, that
                 associates attributes of one database to another.
                 However, the massive growth in the amount and variety
                 of unstructured and semi-structured data on the Web has
                 created new challenges for this task. Such data sources
                 often do not have a fixed pre-defined schema and
                 contain large numbers of diverse attributes.
                 Furthermore, the end goal is not schema alignment as
                 these schemas may be too heterogeneous (and dynamic) to
                 meaningfully align. Rather, the goal is to align any
                 overlapping data shared by these sources. We will show
                 that even attributes with different meanings (that
                 would not qualify as schema matches) can sometimes be
                 useful in aligning data. The solution we propose in
                 this paper replaces the basic schema-matching step with
                 a more complex instance-based schema analysis and
                 linkage discovery. We present a framework consisting of
                 a library of efficient lexical analyzers and similarity
                 functions, and a set of search algorithms for effective
                 and efficient identification of linkage points over Web
                 data. We experimentally evaluate the effectiveness of
                 our proposed algorithms in real-world integration
                 scenarios in several domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fu:2013:LIS,
  author =       "Ada Wai-Chee Fu and Huanhuan Wu and James Cheng and
                 Raymond Chi-Wing Wong",
  title =        "{IS-Label}: an independent-set based labeling scheme
                 for point-to-point distance querying",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "457--468",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of computing shortest path or
                 distance between two query vertices in a graph, which
                 has numerous important applications. Quite a number of
                 indexes have been proposed to answer such distance
                 queries. However, all of these indexes can only process
                 graphs of size barely up to 1 million vertices, which
                 is rather small in view of many of the fast-growing
                 real-world graphs today such as social networks and Web
                 graphs. We propose an efficient index, which is a novel
                 labeling scheme based on the independent set of a
                 graph. We show that our method can handle graphs of
                 size orders of magnitude larger than existing
                 indexes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tran:2013:SUD,
  author =       "Thanh T. L. Tran and Yanlei Diao and Charles Sutton
                 and Anna Liu",
  title =        "Supporting user-defined functions on uncertain data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "469--480",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Uncertain data management has become crucial in many
                 sensing and scientific applications. As user-defined
                 functions (UDFs) become widely used in these
                 applications, an important task is to capture result
                 uncertainty for queries that evaluate UDFs on uncertain
                 data. In this work, we provide a general framework for
                 supporting UDFs on uncertain data. Specifically, we
                 propose a learning approach based on Gaussian processes
                 (GPs) to compute approximate output distributions of a
                 UDF when evaluated on uncertain input, with guaranteed
                 error bounds. We also devise an online algorithm to
                 compute such output distributions, which employs a
                 suite of optimizations to improve accuracy and
                 performance. Our evaluation using both real-world and
                 synthetic functions shows that our proposed GP approach
                 can outperform the state-of-the-art sampling approach
                 with up to two orders of magnitude improvement for a
                 variety of UDFs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2013:IAA,
  author =       "Fanwei Zhu and Yuan Fang and Kevin Chen-Chuan Chang
                 and Jing Ying",
  title =        "Incremental and accuracy-aware {Personalized PageRank}
                 through scheduled approximation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "6",
  pages =        "481--492",
  month =        apr,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:32 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As Personalized PageRank has been widely leveraged for
                 ranking on a graph, the efficient computation of
                 Personalized PageRank Vector (PPV) becomes a prominent
                 issue. In this paper, we propose FastPPV, an
                 approximate PPV computation algorithm that is
                 incremental and accuracy-aware. Our approach hinges on
                 a novel paradigm of scheduled approximation: the
                 computation is partitioned and scheduled for processing
                 in an ``organized'' way, such that we can gradually
                 improve our PPV estimation in an incremental manner,
                 and quantify the accuracy of our approximation at query
                 time. Guided by this principle, we develop an efficient
                 hub based realization, where we adopt the metric of
                 hub-length to partition and schedule random walk tours
                 so that the approximation error reduces exponentially
                 over iterations. Furthermore, as tours are segmented by
                 hubs, the shared substructures between different tours
                 (around the same hub) can be reused to speed up query
                 processing both within and across iterations. Finally,
                 we evaluate FastPPV over two real-world graphs, and
                 show that it not only significantly outperforms two
                 state-of-the-art baselines in both online and offline
                 phrases, but also scale well on larger graphs. In
                 particular, we are able to achieve near-constant time
                 online query processing irrespective of graph size.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zheng:2013:ESB,
  author =       "Weiguo Zheng and Lei Zou and Yansong Feng and Lei Chen
                 and Dongyan Zhao",
  title =        "Efficient simrank-based similarity join over large
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "7",
  pages =        "493--504",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:37 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphs have been widely used to model complex data in
                 many real-world applications. Answering vertex join
                 queries over large graphs is meaningful and
                 interesting, which can benefit friend recommendation in
                 social networks and link prediction, etc. In this
                 paper, we adopt ``SimRank'' to evaluate the similarity
                 of two vertices in a large graph because of its
                 generality. Note that ``SimRank'' is purely structure
                 dependent and it does not rely on the domain knowledge.
                 Specifically, we define a SimRank-based join (SRJ)
                 query to find all the vertex pairs satisfying the
                 threshold in a data graph $G$. In order to reduce the
                 search space, we propose an estimated shortest-path
                 distance based upper bound for SimRank scores to prune
                 unpromising vertex pairs. In the verification, we
                 propose a novel index, called $h$-go cover, to
                 efficiently compute the SimRank score of a single
                 vertex pair. Given a graph $G$, we only materialize the
                 SimRank scores of a small proportion of vertex pairs
                 (called $h$-go covers), based on which, the SimRank
                 score of any vertex pair can be computed easily. In
                 order to handle large graphs, we extend our technique
                 to the partition-based framework. Thorough theoretical
                 analysis and extensive experiments over both real and
                 synthetic datasets confirm the efficiency and
                 effectiveness of our solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2013:PST,
  author =       "Guimei Liu and Andre Suchitra and Limsoon Wong",
  title =        "A performance study of three disk-based structures for
                 indexing and querying frequent itemsets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "7",
  pages =        "505--516",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:37 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Frequent itemset mining is an important problem in the
                 data mining area. Extensive efforts have been devoted
                 to developing efficient algorithms for mining frequent
                 itemsets. However, not much attention is paid on
                 managing the large collection of frequent itemsets
                 produced by these algorithms for subsequent analysis
                 and for user exploration. In this paper, we study three
                 structures for indexing and querying frequent itemsets:
                 inverted files, signature files and CFP-tree. The first
                 two structures have been widely used for indexing
                 general set-valued data. We make some modifications to
                 make them more suitable for indexing frequent itemsets.
                 The CFP-tree structure is specially designed for
                 storing frequent itemsets. We add a pruning technique
                 based on length-2 frequent itemsets to make it more
                 efficient for processing superset queries. We study the
                 performance of the three structures in supporting five
                 types of containment queries: exact match,
                 subset/superset search and immediate subset/superset
                 search. Our results show that no structure can
                 outperform other structures for all the five types of
                 queries on all the datasets. CFP-tree shows better
                 overall performance than the other two structures.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2013:TFC,
  author =       "Pingpeng Yuan and Pu Liu and Buwen Wu and Hai Jin and
                 Wenya Zhang and Ling Liu",
  title =        "{TripleBit}: a fast and compact system for large scale
                 {RDF} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "7",
  pages =        "517--528",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:37 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The volume of RDF data continues to grow over the past
                 decade and many known RDF datasets have billions of
                 triples. A grant challenge of managing this huge RDF
                 data is how to access this big RDF data efficiently. A
                 popular approach to addressing the problem is to build
                 a full set of permutations of $ (S, P, O) $ indexes.
                 Although this approach has shown to accelerate joins by
                 orders of magnitude, the large space overhead limits
                 the scalability of this approach and makes it
                 heavyweight. In this paper, we present TripleBit, a
                 fast and compact system for storing and accessing RDF
                 data. The design of TripleBit has three salient
                 features. First, the compact design of TripleBit
                 reduces both the size of stored RDF data and the size
                 of its indexes. Second, TripleBit introduces two
                 auxiliary index structures, ID-Chunk bit matrix and
                 ID-Predicate bit matrix, to minimize the cost of index
                 selection during query evaluation. Third, its query
                 processor dynamically generates an optimal execution
                 ordering for join queries, leading to fast query
                 execution and effective reduction on the size of
                 intermediate results. Our experiments show that
                 TripleBit outperforms RDF-3X, MonetDB, BitMat on LUBM,
                 UniProt and BTC 2012 benchmark queries and it offers
                 orders of mangnitude performance improvement for some
                 complex join queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bajaj:2013:CSE,
  author =       "Sumeet Bajaj and Radu Sion",
  title =        "{CorrectDB}: {SQL} engine with practical query
                 authentication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "7",
  pages =        "529--540",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:37 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Clients of outsourced databases need Query
                 Authentication (QA) guaranteeing the integrity
                 (correctness and completeness), and authenticity of the
                 query results returned by potentially compromised
                 providers. Existing results provide QA assurances for a
                 limited class of queries by deploying several software
                 cryptographic constructs. Here, we show that, to
                 achieve QA, however, it is significantly cheaper and
                 more practical to deploy server-hosted, tamper-proof
                 co-processors, despite their higher acquisition costs.
                 Further, this provides the ability to handle arbitrary
                 queries. To reach this insight, we extensively survey
                 existing QA work and identify interdependencies and
                 efficiency relationships. We then introduce CorrectDB,
                 a new DBMS with full QA assurances, leveraging
                 server-hosted, tamper-proof, trusted hardware in close
                 proximity to the outsourced data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2013:HSM,
  author =       "Xin Liu and Kenneth Salem",
  title =        "Hybrid storage management for database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "8",
  pages =        "541--552",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The use of flash-based solid state drives (SSDs) in
                 storage systems is growing. Adding SSDs to a storage
                 system not only raises the question of how to manage
                 the SSDs, but also raises the question of whether
                 current buffer pool algorithms will still work
                 effectively. We are interested in the use of hybrid
                 storage systems, consisting of SSDs and hard disk
                 drives (HDDs), for database management. We present
                 cost-aware replacement algorithms, which are aware of
                 the difference in performance between SSDs and HDDs,
                 for both the DBMS buffer pool and the SSDs. In hybrid
                 storage systems, the physical access pattern to the
                 SSDs depends on the management of the DBMS buffer pool.
                 We studied the impact of buffer pool caching policies
                 on SSD access patterns. Based on these studies, we
                 designed a cost-adjusted caching policy to effectively
                 manage the SSD. We implemented these algorithms in
                 MySQL's InnoDB storage engine and used the TPC-C
                 workload to demonstrate that these cost-aware
                 algorithms outperform previous algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2013:SEO,
  author =       "Eugene Wu and Samuel Madden",
  title =        "{Scorpion}: explaining away outliers in aggregate
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "8",
  pages =        "553--564",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database users commonly explore large data sets by
                 running aggregate queries that project the data down to
                 a smaller number of points and dimensions, and
                 visualizing the results. Often, such visualizations
                 will reveal outliers that correspond to errors or
                 surprising features of the input data set.
                 Unfortunately, databases and visualization systems do
                 not provide a way to work backwards from an outlier
                 point to the common properties of the (possibly many)
                 unaggregated input tuples that correspond to that
                 outlier. We propose Scorpion, a system that takes a set
                 of user-specified outlier points in an aggregate query
                 result as input and finds predicates that explain the
                 outliers in terms of properties of the input tuples
                 that are used to compute the selected outlier results.
                 Specifically, this explanation identifies predicates
                 that, when applied to the input data, cause the
                 outliers to disappear from the output. To find such
                 predicates, we develop a notion of influence of a
                 predicate on a given output, and design several
                 algorithms that efficiently search for maximum
                 influence predicates over the input data. We show that
                 these algorithms can quickly find outliers in two real
                 data sets (from a sensor deployment and a campaign
                 finance data set), and run orders of magnitude faster
                 than a naive search algorithm while providing
                 comparable quality on a synthetic data set.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gupta:2013:RTQ,
  author =       "Rajeev Gupta and Krithi Ramamritham and Mukesh
                 Mohania",
  title =        "Ratio threshold queries over distributed data
                 sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "8",
  pages =        "565--576",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Continuous aggregation queries over dynamic data are
                 used for real time decision making and timely business
                 intelligence. In this paper we consider queries where a
                 client wants to be notified if the ratio of two
                 aggregates over distributed data crosses a specified
                 threshold. Consider these scenarios: a mechanism
                 designed to defend against distributed denial of
                 service attacks may be triggered when the fraction of
                 packets arriving to a subnet is more than 5\% of the
                 total packets; or a distributed store chain withdraws
                 its discount on luxury goods when sales of luxury goods
                 constitute more than 20\% of the overall sales. The
                 challenge in executing such ratio threshold queries
                 (RTQs) lies in incurring the minimal amount of
                 communication necessary for propagation of updates from
                 data sources to the aggregator node where the client
                 query is executed. We address this challenge by
                 proposing schemes for converting the client ratio
                 threshold condition into conditions on individual
                 distributed data sources. Whenever the condition
                 associated with a source is violated, the source pushes
                 its data values to the aggregator, which in turn pulls
                 data values from other sources to determine whether the
                 client threshold condition is indeed violated. We
                 present algorithms to minimize the number of source
                 condition violations (i.e., the number of pushes) while
                 ensuring that no violation of the client threshold
                 condition is missed. Further, in case of a source
                 condition violation, we propose efficient selective
                 pulling algorithms for intelligently choosing
                 additional sources whose data should be pulled by the
                 aggregator. Using performance evaluation on synthetic
                 and real traces of data updates we show that our
                 algorithms result in up to an order of magnitude less
                 number of messages compared to existing approaches in
                 the literature.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deng:2013:CQR,
  author =       "Ting Deng and Wenfei Fan",
  title =        "On the complexity of query result diversification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "8",
  pages =        "577--588",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query result diversification is a bi-criteria
                 optimization problem for ranking query results. Given a
                 database $D$, a query $Q$ and a positive integer $k$,
                 it is to find a set of $k$ tuples from $ Q(D) $ such
                 that the tuples are as relevant as possible to the
                 query, and at the same time, as diverse as possible to
                 each other. Subsets of $ Q(D) $ are ranked by an
                 objective function defined in terms of relevance and
                 diversity. Query result diversification has found a
                 variety of applications in databases, information
                 retrieval and operations research. This paper studies
                 the complexity of result diversification for relational
                 queries. We identify three problems in connection with
                 query result diversification, to determine whether
                 there exists a set of $k$ tuples that is ranked above a
                 bound with respect to relevance and diversity, to
                 assess the rank of a given $k$-element set, and to
                 count how many $k$-element sets are ranked above a
                 given bound. We study these problems for a variety of
                 query languages and for three objective functions. We
                 establish the upper and lower bounds of these problems,
                 all matching, for both combined complexity and data
                 complexity. We also investigate several special
                 settings of these problems, identifying tractable
                 cases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dutta:2013:SQF,
  author =       "Sourav Dutta and Ankur Narang and Suman K. Bera",
  title =        "Streaming quotient filter: a near optimal approximate
                 duplicate detection approach for data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "8",
  pages =        "589--600",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:42 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The unparalleled growth and popularity of the Internet
                 coupled with the advent of diverse modern applications
                 such as search engines, on-line transactions, climate
                 warning systems, etc., has catered to an unprecedented
                 expanse in the volume of data stored world-wide.
                 Efficient storage, management, and processing of such
                 massively exponential amount of data has emerged as a
                 central theme of research in this direction. Detection
                 and removal of redundancies and duplicates in real-time
                 from such multi-trillion record-set to bolster resource
                 and compute efficiency constitutes a challenging area
                 of study. The infeasibility of storing the entire data
                 from potentially unbounded data streams, with the need
                 for precise elimination of duplicates calls for
                 intelligent approximate duplicate detection algorithms.
                 The literature hosts numerous works based on the
                 well-known probabilistic bitmap structure, Bloom Filter
                 and its variants. In this paper we propose a novel data
                 structure, Streaming Quotient Filter, (SQF) for
                 efficient detection and removal of duplicates in data
                 streams. SQF intelligently stores the signatures of
                 elements arriving on a data stream, and along with an
                 eviction policy provides near zero false positive and
                 false negative rates. We show that the near optimal
                 performance of SQF is achieved with a very low memory
                 requirement, making it ideal for real-time
                 memory-efficient de-duplication applications having an
                 extremely low false positive and false negative
                 tolerance rates. We present detailed theoretical
                 analysis of the working of SQF, providing a guarantee
                 on its performance. Empirically, we compare SQF to
                 alternate methods and show that the proposed method is
                 superior in terms of memory and accuracy compared to
                 the existing solutions. We also discuss Dynamic SQF for
                 evolving streams and the parallel implementation of
                 SQF.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Korn:2013:RSP,
  author =       "Flip Korn and Barna Saha and Divesh Srivastava and
                 Shanshan Ying",
  title =        "On repairing structural problems in semi-structured
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "601--612",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Semi-structured data such as XML are popular for data
                 interchange and storage. However, many XML documents
                 have improper nesting where open --- and close-tags are
                 unmatched. Since some semi-structured data (e.g.,
                 Latex) have a flexible grammar and since many XML
                 documents lack an accompanying DTD or XSD, we focus on
                 computing a syntactic repair via the edit distance. To
                 solve this problem, we propose a dynamic programming
                 algorithm which takes cubic time. While this algorithm
                 is not scalable, well-formed substrings of the data can
                 be pruned to enable faster computation. Unfortunately,
                 there are still cases where the dynamic program could
                 be very expensive; hence, we give branch-and-bound
                 algorithms based on various combinations of two
                 heuristics, called MinCost and MaxBenefit, that trade
                 off between accuracy and efficiency. Finally, we
                 experimentally demonstrate the performance of these
                 algorithms on real data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Manshadi:2013:DAL,
  author =       "Faraz Makari Manshadi and Baruch Awerbuch and Rainer
                 Gemulla and Rohit Khandekar and Juli{\'a}n Mestre and
                 Mauro Sozio",
  title =        "A distributed algorithm for large-scale generalized
                 matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "613--624",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Generalized matching problems arise in a number of
                 applications, including computational advertising,
                 recommender systems, and trade markets. Consider, for
                 example, the problem of recommending multimedia items
                 (e.g., DVDs) to users such that (1) users are
                 recommended items that they are likely to be interested
                 in, (2) every user gets neither too few nor too many
                 recommendations, and (3) only items available in stock
                 are recommended to users. State-of-the-art matching
                 algorithms fail at coping with large real-world
                 instances, which may involve millions of users and
                 items. We propose the first distributed algorithm for
                 computing near-optimal solutions to large-scale
                 generalized matching problems like the one above. Our
                 algorithm is designed to run on a small cluster of
                 commodity nodes (or in a MapReduce environment), has
                 strong approximation guarantees, and requires only a
                 poly-logarithmic number of passes over the input. In
                 particular, we propose a novel distributed algorithm to
                 approximately solve mixed packing-covering linear
                 programs, which include but are not limited to
                 generalized matching problems. Experiments on
                 real-world and synthetic data suggest that a practical
                 variant of our algorithm scales to very large problem
                 sizes and can be orders of magnitude faster than
                 alternative approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Geerts:2013:LDC,
  author =       "Floris Geerts and Giansalvatore Mecca and Paolo
                 Papotti and Donatello Santoro",
  title =        "The {LLUNATIC} data-cleaning framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "625--636",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data-cleaning (or data-repairing) is considered a
                 crucial problem in many database-related tasks. It
                 consists in making a database consistent with respect
                 to a set of given constraints. In recent years,
                 repairing methods have been proposed for several
                 classes of constraints. However, these methods rely on
                 ad hoc decisions and tend to hard-code the strategy to
                 repair conflicting values. As a consequence, there is
                 currently no general algorithm to solve database
                 repairing problems that involve different kinds of
                 constraints and different strategies to select
                 preferred values. In this paper we develop a uniform
                 framework to solve this problem. We propose a new
                 semantics for repairs, and a chase-based algorithm to
                 compute minimal solutions. We implemented the framework
                 in a DBMS-based prototype, and we report experimental
                 results that confirm its good scalability and superior
                 quality in computing repairs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Psaroudakis:2013:SDW,
  author =       "Iraklis Psaroudakis and Manos Athanassoulis and
                 Anastasia Ailamaki",
  title =        "Sharing data and work across concurrent analytical
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "637--648",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today's data deluge enables organizations to collect
                 massive data, and analyze it with an ever-increasing
                 number of concurrent queries. Traditional data
                 warehouses (DW) face a challenging problem in executing
                 this task, due to their query-centric model: each query
                 is optimized and executed independently. This model
                 results in high contention for resources. Thus, modern
                 DW depart from the query-centric model to execution
                 models involving sharing of common data and work. Our
                 goal is to show when and how a DW should employ
                 sharing. We evaluate experimentally two sharing
                 methodologies, based on their original prototype
                 systems, that exploit work sharing opportunities among
                 concurrent queries at run-time: Simultaneous Pipelining
                 (SP), which shares intermediate results of common
                 sub-plans, and Global Query Plans (GQP), which build
                 and evaluate a single query plan with shared operators.
                 First, after a short review of sharing methodologies,
                 we show that SP and GQP are orthogonal techniques. SP
                 can be applied to shared operators of a GQP, reducing
                 response times by 20\%--48\% in workloads with numerous
                 common sub-plans. Second, we corroborate previous
                 results on the negative impact of SP on performance for
                 cases of low concurrency. We attribute this behavior to
                 a bottleneck caused by the push-based communication
                 model of SP. We show that pull-based communication for
                 SP eliminates the overhead of sharing altogether for
                 low concurrency, and scales better on multi-core
                 machines than push-based SP, further reducing response
                 times by 82\%--86\% for high concurrency. Third, we
                 perform an experimental analysis of SP, GQP and their
                 combination, and show when each one is beneficial. We
                 identify a trade-off between low and high concurrency.
                 In the former case, traditional query-centric operators
                 with SP perform better, while in the latter case, GQP
                 with shared operators enhanced by SP give the best
                 results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shang:2013:SOA,
  author =       "Haichuan Shang and Masaru Kitsuregawa",
  title =        "Skyline operator on anti-correlated distributions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "649--660",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding the skyline in a multi-dimensional space is
                 relevant to a wide range of applications. The skyline
                 operator over a set of $d$-dimensional points selects
                 the points that are not dominated by any other point on
                 all dimensions. Therefore, it provides a minimal set of
                 candidates for the users to make their personal
                 trade-off among all optimal solutions. The existing
                 algorithms establish both the worst case complexity by
                 discarding distributions and the average case
                 complexity by assuming dimensional independence.
                 However, the data in the real world is more likely to
                 be anti-correlated. The cardinality and complexity
                 analysis on dimensionally independent data is
                 meaningless when dealing with anti-correlated data.
                 Furthermore, the performance of the existing algorithms
                 becomes impractical on anti-correlated data. In this
                 paper, we establish a cardinality model for
                 anti-correlated distributions. We propose an accurate
                 polynomial estimation for the expected value of the
                 skyline cardinality. Because the high skyline
                 cardinality downgrades the performance of most existing
                 algorithms on anti-correlated data, we further develop
                 a determination and elimination framework which extends
                 the well-adopted elimination strategy. It achieves
                 remarkable effectiveness and efficiency. The
                 comprehensive experiments on both real datasets and
                 benchmark synthetic datasets demonstrate that our
                 approach significantly outperforms the state-of-the-art
                 algorithms under a wide range of settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mahmoud:2013:LLM,
  author =       "Hatem Mahmoud and Faisal Nawab and Alexander Pucher
                 and Divyakant Agrawal and Amr {El Abbadi}",
  title =        "Low-latency multi-datacenter databases using
                 replicated commit",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "661--672",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Web service providers have been using NoSQL datastores
                 to provide scalability and availability for globally
                 distributed data at the cost of sacrificing
                 transactional guarantees. Recently, major web service
                 providers like Google have moved towards building
                 storage systems that provide ACID transactional
                 guarantees for globally distributed data. For example,
                 the newly published system, Spanner, uses Two-Phase
                 Commit and Two-Phase Locking to provide atomicity and
                 isolation for globally distributed data, running on top
                 of Paxos to provide fault-tolerant log replication. We
                 show in this paper that it is possible to provide the
                 same ACID transactional guarantees for multi-datacenter
                 databases with fewer cross-datacenter communication
                 trips, compared to replicated logging. Instead of
                 replicating the transactional log, we replicate the
                 commit operation itself, by running Two-Phase Commit
                 multiple times in different datacenters and using Paxos
                 to reach consensus among datacenters as to whether the
                 transaction should commit. Doing so not only replaces
                 several inter-datacenter communication trips with
                 intra-datacenter communication trips, but also allows
                 us to integrate atomic commitment and isolation
                 protocols with consistent replication protocols to
                 further reduce the number of cross-datacenter
                 communication trips needed for consistent replication;
                 for example, by eliminating the need for an election
                 phase in Paxos. We analyze our approach in terms of
                 communication trips to compare it against the log
                 replication approach, then we conduct an extensive
                 experimental study to compare the performance and
                 scalability of both approaches under various
                 multi-datacenter setups.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chi:2013:DBQ,
  author =       "Yun Chi and Hakan Hac{\'\i}g{\"u}m{\"u}s and Wang-Pin
                 Hsiung and Jeffrey F. Naughton",
  title =        "Distribution-based query scheduling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "673--684",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query scheduling, a fundamental problem in database
                 management systems, has recently received a renewed
                 attention, perhaps in part due to the rise of the
                 ``database as a service'' (DaaS) model for database
                 deployment. While there has been a great deal of work
                 investigating different scheduling algorithms, there
                 has been comparatively little work investigating what
                 the scheduling algorithms can or should know about the
                 queries to be scheduled. In this work, we investigate
                 the efficacy of using histograms describing the
                 distribution of likely query execution times as input
                 to the query scheduler. We propose a novel
                 distribution-based scheduling algorithm, Shepherd, and
                 show that Shepherd substantially outperforms
                 state-of-the-art point-based methods through extensive
                 experimentation with both synthetic and TPC
                 workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2013:MQT,
  author =       "Wenfei Fan and Floris Geerts and Frank Neven",
  title =        "Making queries tractable on big data with
                 preprocessing: through the eyes of complexity theory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "685--696",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A query class is traditionally considered tractable if
                 there exists a polynomial-time (PTIME) algorithm to
                 answer its queries. When it comes to big data, however,
                 PTIME algorithms often become infeasible in practice. A
                 traditional and effective approach to coping with this
                 is to preprocess data off-line, so that queries in the
                 class can be subsequently evaluated on the data
                 efficiently. This paper aims to provide a formal
                 foundation for this approach in terms of computational
                 complexity. (1) We propose a set of $ \Pi $-tractable
                 queries, denoted by $ \Pi T Q^0 $, to characterize
                 classes of queries that can be answered in parallel
                 poly-logarithmic time (NC) after PTIME preprocessing.
                 (2) We show that several natural query classes are $
                 \Pi $-tractable and are feasible on big data. (3) We
                 also study a set $ \Pi T Q $ of query classes that can
                 be effectively converted to $ \Pi $-tractable queries
                 by refactorizing its data and queries for
                 preprocessing. We introduce a form of NC reductions to
                 characterize such conversions. (4) We show that a
                 natural query class is complete for $ \Pi T Q $. (5) We
                 also show that $ \Pi T Q^0 \subset P $ unless $ P =
                 {\rm NC} $, i.e., the set $ \Pi T Q^0 $ of all $ \Pi
                 $-tractable queries is properly contained in the set
                 $P$ of all PTIME queries. Nonetheless, $ \Pi T Q = P $,
                 i.e., all PTIME query classes can be made $ \Pi
                 $-tractable via proper refactorizations. This work is a
                 step towards understanding the tractability of queries
                 in the context of big data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kaplan:2013:APQ,
  author =       "Haim Kaplan and Ilia Lotosh and Tova Milo and Slava
                 Novgorodov",
  title =        "Answering planning queries with the crowd",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "697--708",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent research has shown that crowd sourcing can be
                 used effectively to solve problems that are difficult
                 for computers, e.g., optical character recognition and
                 identification of the structural configuration of
                 natural proteins. In this paper we propose to use the
                 power of the crowd to address yet another difficult
                 problem that frequently occurs in a daily life ---
                 answering planning queries whose output is a sequence
                 of objects/actions, when the goal, i.e, the notion of
                 ``best output'', is hard to formalize. For example,
                 planning the sequence of places/attractions to visit in
                 the course of a vacation, where the goal is to enjoy
                 the resulting vacation the most, or planning the
                 sequence of courses to take in an academic schedule
                 planning, where the goal is to obtain solid knowledge
                 of a given subject domain. Such goals may be easily
                 understandable by humans, but hard or even impossible
                 to formalize for a computer. We present a novel
                 algorithm for efficiently harnessing the crowd to
                 assist in answering such planning queries. The
                 algorithm builds the desired plans incrementally,
                 choosing at each step the 'best' questions so that the
                 overall number of questions that need to be asked is
                 minimized. We prove the algorithm to be optimal within
                 its class and demonstrate experimentally its
                 effectiveness and efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Heimel:2013:HOP,
  author =       "Max Heimel and Michael Saecker and Holger Pirk and
                 Stefan Manegold and Volker Markl",
  title =        "Hardware-oblivious parallelism for in-memory
                 column-stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "709--720",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The multi-core architectures of today's computer
                 systems make parallelism a necessity for performance
                 critical applications. Writing such applications in a
                 generic, hardware-oblivious manner is a challenging
                 problem: Current database systems thus rely on
                 labor-intensive and error-prone manual tuning to
                 exploit the full potential of modern parallel hardware
                 architectures like multi-core CPUs and graphics cards.
                 We propose an alternative design for a parallel
                 database engine, based on a single set of
                 hardware-oblivious operators, which are compiled down
                 to the actual hardware at runtime. This design reduces
                 the development overhead for parallel database engines,
                 while achieving competitive performance to hand-tuned
                 systems. We provide a proof-of-concept for this design
                 by integrating operators written using the parallel
                 programming framework OpenCL into the open-source
                 database MonetDB. Following this approach, we achieve
                 efficient, yet highly portable parallel code without
                 the need for optimization by hand. We evaluated our
                 implementation against MonetDB using TPC-H derived
                 queries and observed a performance that rivals that of
                 MonetDB's query execution on the CPU and surpasses it
                 on the GPU. In addition, we show that the same set of
                 operators runs nearly unchanged on a GPU, demonstrating
                 the feasibility of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Thonangi:2013:PDR,
  author =       "Risi Thonangi and Jun Yang",
  title =        "Permuting data on random-access block storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "721--732",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Permutation is a fundamental operator for array data,
                 with applications in, for example, changing matrix
                 layouts and reorganizing data cubes. We consider the
                 problem of permuting large quantities of data stored on
                 secondary storage that supports fast random block
                 accesses, such as solid state drives and distributed
                 key--value stores. Faster random accesses open up
                 interesting new opportunities for permutation. While
                 external merge sort has often been used for
                 permutation, it is an overkill that fails to exploit
                 the property of permutation fully and carries
                 unnecessary overhead in storing and comparing keys. We
                 propose faster algorithms with lower memory
                 requirements for a large, useful class of permutations.
                 We also tackle practical challenges that traditional
                 permutation algorithms have not dealt with, such as
                 exploiting random block accesses more aggressively,
                 considering the cost asymmetry between reads and
                 writes, and handling arbitrary data dimension sizes (as
                 opposed to perfect powers often assumed by previous
                 work). As a result, our algorithms are faster and more
                 broadly applicable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Stoica:2013:IFW,
  author =       "Radu Stoica and Anastasia Ailamaki",
  title =        "Improving flash write performance by using update
                 frequency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "733--744",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Solid-state drives (SSDs) are quickly becoming the
                 default storage medium as the cost of NAND flash memory
                 continues to drop. However, flash memory introduces new
                 challenges, as data cannot be eciently updated
                 in-place. To overcome the technology's limitations,
                 SSDs incorporate a software Flash Translation Layer
                 (FTL) that implements out-of-place updates, typically
                 by storing data in a log-structured fashion. Despite a
                 large number of existing FTL algorithms, SSD
                 performance, predictability, and lifetime remain an
                 issue, especially for the write-intensive workloads
                 specific to database applications. In this paper, we
                 show how to design FTLs that are more efficient by
                 using the I/O write skew to guide data placement on
                 flash memory. We model the relationship between data
                 placement and write performance for basic I/O write
                 patterns and detail the most important concepts of
                 writing to flash memory: (i) the trade-o between the
                 extra capacity available and write overhead, (ii) the
                 benefit of adapting data placement to write skew, (iii)
                 the impact of the cleaning policy, and (iv) how to
                 estimate the best achievable write performance for a
                 given I/O workload. Based on the findings of the
                 theoretical model, we propose a new principled data
                 placement algorithm that can be incorporated into
                 existing FTLs. We show the benefits of our data
                 placement algorithm when running micro-benchmarks and
                 real database I/O traces: our data placement algorithm
                 reduces write overhead by 20\%--75\% when compared to
                 state-of-art techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2013:EID,
  author =       "Lu Li and Chee-Yong Chan",
  title =        "Efficient indexing for diverse query results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "745--756",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper examines the problem of computing diverse
                 query results which is useful for browsing search
                 results in online shopping applications. The search
                 results are diversified wrt a sequence of output
                 attributes (termed $d$-order) where an attribute that
                 appears earlier in the $d$-order has higher priority
                 for diversification. We present a new indexing
                 technique, $D$-Index, to efficiently compute diverse
                 query results for queries with static or dynamic
                 $d$-orders. Our performance evaluation demonstrates
                 that our $D$-Index outperforms the state-of-the-art
                 techniques developed for queries with static or dynamic
                 $d$-orders.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2013:RUS,
  author =       "Chen Jason Zhang and Lei Chen and H. V. Jagadish and
                 Chen Caleb Cao",
  title =        "Reducing uncertainty of schema matching via
                 crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "757--768",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Schema matching is a central challenge for data
                 integration systems. Automated tools are often
                 uncertain about schema matchings they suggest, and this
                 uncertainty is inherent since it arises from the
                 inability of the schema to fully capture the semantics
                 of the represented data. Human common sense can often
                 help. Inspired by the popularity and the success of
                 easily accessible crowdsourcing platforms, we explore
                 the use of crowdsourcing to reduce the uncertainty of
                 schema matching. Since it is typical to ask simple
                 questions on crowdsourcing platforms, we assume that
                 each question, namely Correspondence Correctness
                 Question (CCQ), is to ask the crowd to decide whether a
                 given correspondence should exist in the correct
                 matching. We propose frameworks and efficient
                 algorithms to dynamically manage the CCQs, in order to
                 maximize the uncertainty reduction within a limited
                 budget of questions. We develop two novel approaches,
                 namely ``Single CCQ'' and ``Multiple CCQ'', which
                 adaptively select, publish and manage the questions. We
                 verified the value of our solutions with simulation and
                 real implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2013:TCI,
  author =       "Bin Yang and Chenjuan Guo and Christian S. Jensen",
  title =        "Travel cost inference from sparse, spatio temporally
                 correlated time series using {Markov} models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "9",
  pages =        "769--780",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The monitoring of a system can yield a set of
                 measurements that can be modeled as a collection of
                 time series. These time series are often sparse, due to
                 missing measurements, and spatiotemporally correlated,
                 meaning that spatially close time series exhibit
                 temporal correlation. The analysis of such time series
                 offers insight into the underlying system and enables
                 prediction of system behavior. While the techniques
                 presented in the paper apply more generally, we
                 consider the case of transportation systems and aim to
                 predict travel cost from GPS tracking data from probe
                 vehicles. Specifically, each road segment has an
                 associated travel-cost time series, which is derived
                 from GPS data. We use spatio-temporal hidden Markov
                 models (STHMM) to model correlations among different
                 traffic time series. We provide algorithms that are
                 able to learn the parameters of an STHMM while
                 contending with the sparsity, spatio-temporal
                 correlation, and heterogeneity of the time series.
                 Using the resulting STHMM, near future travel costs in
                 the transportation network, e.g., travel time or
                 greenhouse gas emissions, can be inferred, enabling a
                 variety of routing services, e.g., eco-routing.
                 Empirical studies with a substantial GPS data set offer
                 insight into the design properties of the proposed
                 framework and algorithms, demonstrating the
                 effectiveness and efficiency of travel cost
                 inferencing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Park:2013:QOC,
  author =       "Hyunjung Park and Jennifer Widom",
  title =        "Query optimization over crowdsourced data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "781--792",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Deco is a comprehensive system for answering
                 declarative queries posed over stored relational data
                 together with data obtained on-demand from the crowd.
                 In this paper we describe Deco's cost-based query
                 optimizer, building on Deco's data model, query
                 language, and query execution engine presented earlier.
                 Deco's objective in query optimization is to find the
                 best query plan to answer a query, in terms of
                 estimated monetary cost. Deco's query semantics and
                 plan execution strategies require several fundamental
                 changes to traditional query optimization. Novel
                 techniques incorporated into Deco's query optimizer
                 include a cost model distinguishing between ``free''
                 existing data versus paid new data, a cardinality
                 estimation algorithm coping with changes to the
                 database state during query execution, and a plan
                 enumeration algorithm maximizing reuse of common
                 subplans in a setting that makes reuse challenging. We
                 experimentally evaluate Deco's query optimizer,
                 focusing on the accuracy of cost estimation and the
                 efficiency of plan enumeration.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2013:DAD,
  author =       "Yang Wang and Peng Wang and Jian Pei and Wei Wang and
                 Sheng Huang",
  title =        "A data-adaptive and dynamic segmentation index for
                 whole matching on time series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "793--804",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Similarity search on time series is an essential
                 operation in many applications. In the state-of-the-art
                 methods, such as the R-tree based methods, SAX and
                 iSAX, time series are by default divided into
                 equi-length segments globally, that is, all time series
                 are segmented in the same way. Those methods then focus
                 on how to approximate or symbolize the segments and
                 construct indexes. In this paper, we make an important
                 observation: global segmentation of all time series may
                 incur unnecessary cost in space and time for indexing
                 time series. We develop DSTree, a data adaptive and
                 dynamic segmentation index on time series. In addition
                 to savings in space and time, our new index can provide
                 tight upper and lower bounds on distances between time
                 series. An extensive empirical study shows that our new
                 index DSTree supports time series similarity search
                 effectively and efficiently.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bronzi:2013:EIP,
  author =       "Mirko Bronzi and Valter Crescenzi and Paolo Merialdo
                 and Paolo Papotti",
  title =        "Extraction and integration of partially overlapping
                 web sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "805--816",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present an unsupervised approach for harvesting the
                 data exposed by a set of structured and partially
                 overlapping data-intensive web sources. Our proposal
                 comes within a formal framework tackling two problems:
                 the data extraction problem, to generate extraction
                 rules based on the input websites, and the data
                 integration problem, to integrate the extracted data in
                 a unified schema. We introduce an original algorithm,
                 WEIR, to solve the stated problems and formally prove
                 its correctness. WEIR leverages the overlapping data
                 among sources to make better decisions both in the data
                 extraction (by pruning rules that do not lead to
                 redundant information) and in the data integration (by
                 reflecting local properties of a source over the
                 mediated schema). Along the way, we characterize the
                 amount of redundancy needed by our algorithm to produce
                 a solution, and present experimental results to show
                 the benefits of our approach with respect to existing
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2013:YYP,
  author =       "Yuan Yuan and Rubao Lee and Xiaodong Zhang",
  title =        "The {Yin} and {Yang} of processing data warehousing
                 queries on {GPU} devices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "817--828",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database community has made significant research
                 efforts to optimize query processing on GPUs in the
                 past few years. However, we can hardly find that GPUs
                 have been truly adopted in major warehousing production
                 systems. Preparing to merge GPUs to the warehousing
                 systems, we have identified and addressed several
                 critical issues in a three-dimensional study of
                 warehousing queries on GPUs by varying query
                 characteristics, software techniques, and GPU hardware
                 configurations. We also propose an analytical model to
                 understand and predict the query performance on GPUs.
                 Based on our study, we present our performance insights
                 for warehousing query execution on GPUs. The objective
                 of our work is to provide a comprehensive guidance for
                 GPU architects, software system designers, and database
                 practitioners to narrow the speed gap between the GPU
                 kernel execution (the fast mode) and data transfer to
                 prepare GPU execution (the slow mode) for high
                 performance in processing data warehousing queries. The
                 GPU query engine developed in this work is open source
                 to the public.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2013:MIG,
  author =       "Dayu Yuan and Prasenjit Mitra and C. Lee Giles",
  title =        "Mining and indexing graphs for supergraph search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "829--840",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study supergraph search (SPS), that is, given a
                 query graph $q$ and a graph database $G$ that contains
                 a collection of graphs, return graphs that have $q$ as
                 a supergraph from $G$. SPS has broad applications in
                 bioinformatics, cheminformatics and other scientific
                 and commercial fields. Determining whether a graph is a
                 subgraph (or supergraph) of another is an NP-complete
                 problem. Hence, it is intractable to compute SPS for
                 large graph databases. Two separate indexing methods, a
                 ``filter + verify''-based method and a
                 ``prefix-sharing''-based method, have been studied to
                 efficiently compute SPS. To implement the above two
                 methods, subgraph patterns are mined from the graph
                 database to build an index. Those subgraphs are mined
                 to optimize either the filtering gain or the
                 prefix-sharing gain. However, no single subgraph-mining
                 algorithm considers both gains. This work is the first
                 one to mine subgraphs to optimize both the filtering
                 gain and the prefix-sharing gain while processing SPS
                 queries. First, we show that the subgraph-mining
                 problem is NP-hard. Then, we propose two
                 polynomial-time algorithms to solve the problem with an
                 approximation ratio of $ 1 - 1 / e $ and $ 1 / 4 $
                 respectively. In addition, we construct a lattice-like
                 index, LW-index, to organize the selected subgraph
                 patterns for fast index-lookup. Our experiments show
                 that our approach improves the query processing time
                 for SPS queries by a factor of 3 to 10.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2013:ERM,
  author =       "Jianmin Wang and Shaoxu Song and Xiaochen Zhu and
                 Xuemin Lin",
  title =        "Efficient recovery of missing events",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "841--852",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "For various entering and transmission issues raised by
                 human or system, missing events often occur in event
                 data, which record execution logs of business
                 processes. Without recovering these missing events,
                 applications such as provenance analysis or complex
                 event processing built upon event data are not
                 reliable. Following the minimum change discipline in
                 improving data quality, it is also rational to find a
                 recovery that minimally differs from the original data.
                 Existing recovery approaches fall short of efficiency
                 owing to enumerating and searching over all the
                 possible sequences of events. In this paper, we study
                 the efficient techniques for recovering missing events.
                 According to our theoretical results, the recovery
                 problem is proved to be NP-hard. Nevertheless, we are
                 able to concisely represent the space of event
                 sequences in a branching framework. Advanced indexing
                 and pruning techniques are developed to further improve
                 the recovery efficiency. Our proposed efficient
                 techniques make it possible to find top-$k$ recoveries.
                 The experimental results demonstrate that our minimum
                 recovery approach achieves high accuracy, and
                 significantly outperforms the state-of-the-art
                 technique for up to 5 orders of magnitudes improvement
                 in time performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2013:HAA,
  author =       "Kai Ren and YongChul Kwon and Magdalena Balazinska and
                 Bill Howe",
  title =        "{Hadoop}'s adolescence: an analysis of {Hadoop} usage
                 in scientific workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "853--864",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We analyze Hadoop workloads from three di?erent
                 research clusters from a user-centric perspective. The
                 goal is to better understand data scientists' use of
                 the system and how well the use of the system matches
                 its design. Our analysis suggests that Hadoop usage is
                 still in its adolescence. We see underuse of Hadoop
                 features, extensions, and tools. We see significant
                 diversity in resource usage and application styles,
                 including some interactive and iterative workloads,
                 motivating new tools in the ecosystem. We also observe
                 significant opportunities for optimizations of these
                 workloads. We find that job customization and
                 configuration are used in a narrow scope, suggesting
                 the future pursuit of automatic tuning systems.
                 Overall, we present the first user-centered measurement
                 study of Hadoop and find significant opportunities for
                 improving its efficient use for data scientists.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mansour:2013:RSE,
  author =       "Essam Mansour and Ahmed El-Roby and Panos Kalnis and
                 Aron Ahmadia and Ashraf Aboulnaga",
  title =        "{RACE}: a scalable and elastic parallel system for
                 discovering repeats in very long sequences",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "865--876",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A wide range of applications, including
                 bioinformatics, time series, and log analysis, depend
                 on the identification of repetitions in very long
                 sequences. The problem of finding maximal pairs
                 subsumes most important types of repetition-finding
                 tasks. Existing solutions require both the input
                 sequence and its index (typically an order of magnitude
                 larger than the input) to fit in memory. Moreover, they
                 are serial algorithms with long execution time.
                 Therefore, they are limited to small datasets, despite
                 the fact that modern applications demand orders of
                 magnitude longer sequences. In this paper we present
                 RACE, a parallel system for finding maximal pairs in
                 very long sequences. RACE supports parallel execution
                 on stand-alone multicore systems, in addition to
                 scaling to thousands of nodes on clusters or
                 supercomputers. RACE does not require the input or the
                 index to fit in memory; therefore, it supports very
                 long sequences with limited memory. Moreover, it uses a
                 novel array representation that allows for
                 cache-efficient implementation. RACE is particularly
                 suitable for the cloud (e.g., Amazon EC2) because,
                 based on availability, it can scale elastically to more
                 or fewer machines during its execution. Since scaling
                 out introduces overheads, mainly due to load imbalance,
                 we propose a cost model to estimate the expected
                 speedup, based on statistics gathered through sampling.
                 The model allows the user to select the appropriate
                 combination of cloud resources based on the provider's
                 prices and the required deadline. We conducted
                 extensive experimental evaluation with large real
                 datasets and large computing infrastructures. In
                 contrast to existing methods, RACE can handle the
                 entire human genome on a typical desktop computer with
                 16GB RAM. Moreover, for a problem that takes 10 hours
                 of serial execution, RACE finishes in 28 seconds using
                 2,048 nodes on an IBM BlueGene/P supercomputer.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Levandoski:2013:LCS,
  author =       "Justin Levandoski and David Lomet and Sudipta
                 Sengupta",
  title =        "{LLAMA}: a cache\slash storage subsystem for modern
                 hardware",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "877--888",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "LLAMA is a subsystem designed for new hardware
                 environments that supports an API for page-oriented
                 access methods, providing both cache and storage
                 management. Caching (CL) and storage (SL) layers use a
                 common mapping table that separates a page's logical
                 and physical location. CL supports data updates and
                 management updates (e.g., for index re-organization)
                 via latch-free compare-and-swap atomic state changes on
                 its mapping table. SL uses the same mapping table to
                 cope with page location changes produced by log
                 structuring on every page flush. To demonstrate LLAMA's
                 suitability, we tailored our latch-free Bw-tree
                 implementation to use LLAMA. The Bw-tree is a B-tree
                 style index. Layered on LLAMA, it has higher
                 performance and scalability using real workloads
                 compared with BerkeleyDB's B-tree, which is known for
                 good performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2013:RCP,
  author =       "Jiong He and Mian Lu and Bingsheng He",
  title =        "Revisiting co-processing for hash joins on the coupled
                 {CPU--GPU} architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "889--900",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query co-processing on graphics processors (GPUs) has
                 become an effective means to improve the performance of
                 main memory databases. However, the relatively low
                 bandwidth and high latency of the PCI-e bus are usually
                 bottleneck issues for co-processing. Recently, coupled
                 CPU-GPU architectures have received a lot of attention,
                 e.g. AMD APUs with the CPU and the GPU integrated into
                 a single chip. That opens up new opportunities for
                 optimizing query co-processing. In this paper, we
                 experimentally revisit hash joins, one of the most
                 important join algorithms for main memory databases, on
                 a coupled CPU-GPU architecture. Particularly, we study
                 the fine-grained co-processing mechanisms on hash joins
                 with and without partitioning. The co-processing
                 outlines an interesting design space. We extend
                 existing cost models to automatically guide decisions
                 on the design space. Our experimental results on a
                 recent AMD APU show that (1) the coupled architecture
                 enables fine-grained co-processing and cache reuses,
                 which are inefficient on discrete CPU-GPU
                 architectures; (2) the cost model can automatically
                 guide the design and tuning knobs in the design space;
                 (3) fine-grained co-processing achieves up to 53\%,
                 35\% and 28\% performance improvement over CPU-only,
                 GPU-only and conventional CPU-GPU co-processing,
                 respectively. We believe that the insights and
                 implications from this study are initial yet important
                 for further research on query co-processing on coupled
                 CPU-GPU architectures.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qiao:2013:TKN,
  author =       "Miao Qiao and Lu Qin and Hong Cheng and Jeffrey Xu Yu
                 and Wentao Tian",
  title =        "Top-$k$ nearest keyword search on large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "901--912",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "It is quite common for networks emerging nowadays to
                 have labels or textual contents on the nodes. On such
                 networks, we study the problem of top-$k$ nearest
                 keyword ($k$-NK) search. In a network $G$ modeled as an
                 undirected graph, each node is attached with zero or
                 more keywords, and each edge is assigned with a weight
                 measuring its length. Given a query node $q$ in $G$ and
                 a keyword $ \lambda $, a $k$-NK query seeks $k$ nodes
                 which contain $ \lambda $ and are nearest to $q$.
                 $k$-NK is not only useful as a stand-alone query but
                 also as a building block for tackling complex graph
                 pattern matching problems. The key to an accurate
                 $k$-NK result is a precise shortest distance estimation
                 in a graph. Based on the latest distance oracle
                 technique, we build a shortest path tree for a distance
                 oracle and use the tree distance as a more accurate
                 estimation. With such representation, the original
                 $k$-NK query on a graph can be reduced to answering the
                 query on a set of trees and then assembling the results
                 obtained from the trees. We propose two efficient
                 algorithms to report the exact $k$-NK result on a tree.
                 One is query time optimized for a scenario when a small
                 number of result nodes are of interest to users. The
                 other handles $k$-NK queries for an arbitrarily large
                 $k$ efficiently. In obtaining a $k$-NK result on a
                 graph from that on trees, a global storage technique is
                 proposed to further reduce the index size and the query
                 time. Extensive experimental results conform with our
                 theoretical findings, and demonstrate the effectiveness
                 and efficiency of our $k$-NK algorithms on large real
                 graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Armenatzoglou:2013:GFG,
  author =       "Nikos Armenatzoglou and Stavros Papadopoulos and
                 Dimitris Papadias",
  title =        "A general framework for geo-social query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "913--924",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The proliferation of GPS-enabled mobile devises and
                 the popularity of social networking have recently led
                 to the rapid growth of Geo-Social Networks (GeoSNs).
                 GeoSNs have created a fertile ground for novel
                 location-based social interactions and advertising.
                 These can be facilitated by GeoSN queries, which
                 extract useful information combining both the social
                 relationships and the current location of the users.
                 This paper constitutes the first systematic work on
                 GeoSN query processing. We propose a general framework
                 that offers flexible data management and algorithmic
                 design. Our architecture segregates the social,
                 geographical and query processing modules. Each GeoSN
                 query is processed via a transparent combination of
                 primitive queries issued to the social and geographical
                 modules. We demonstrate the power of our framework by
                 introducing several ``basic'' and ``advanced'' query
                 types, and devising various solutions for each type.
                 Finally, we perform an exhaustive experimental
                 evaluation with real and synthetic datasets, based on
                 realistic implementations with both commercial software
                 (such as MongoDB) and state-of-the-art research
                 methods. Our results confirm the viability of our
                 framework in typical large-scale GeoSNs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2013:TPQ,
  author =       "Wentao Wu and Yun Chi and Hakan Hac{\'\i}g{\"u}m{\"u}s
                 and Jeffrey F. Naughton",
  title =        "Towards predicting query execution time for concurrent
                 and dynamic database workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "925--936",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Predicting query execution time is crucial for many
                 database management tasks including admission control,
                 query scheduling, and progress monitoring. While a
                 number of recent papers have explored this problem, the
                 bulk of the existing work either considers prediction
                 for a single query, or prediction for a static workload
                 of concurrent queries, where by ``static'' we mean that
                 the queries to be run are fixed and known. In this
                 paper, we consider the more general problem of dynamic
                 concurrent workloads. Unlike most previous work on
                 query execution time prediction, our proposed framework
                 is based on analytic modeling rather than machine
                 learning. We first use the optimizer's cost model to
                 estimate the I/O and CPU requirements for each pipeline
                 of each query in isolation, and then use a combination
                 queueing model and buffer pool model that merges the
                 I/O and CPU requests from concurrent queries to predict
                 running times. We compare the proposed approach with a
                 machine-learning based approach that is a variant of
                 previous work. Our experiments show that our
                 analytic-model based approach can lead to competitive
                 and often better prediction accuracy than its
                 machine-learning based counterpart.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Garofalakis:2013:SBG,
  author =       "Minos Garofalakis and Daniel Keren and Vasilis
                 Samoladas",
  title =        "Sketch-based geometric monitoring of distributed
                 stream queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "937--948",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Emerging large-scale monitoring applications rely on
                 continuous tracking of complex data-analysis queries
                 over collections of massive, physically-distributed
                 data streams. Thus, in addition to the space- and
                 time-efficiency requirements of conventional stream
                 processing (at each remote monitor site), effective
                 solutions also need to guarantee communication
                 efficiency (over the underlying communication network).
                 The complexity of the monitored query adds to the
                 difficulty of the problem --- this is especially true
                 for nonlinear queries (e.g., joins), where no obvious
                 solutions exist for distributing the monitor condition
                 across sites. The recently proposed geometric method
                 offers a generic methodology for splitting an arbitrary
                 (non-linear) global threshold-monitoring task into a
                 collection of local site constraints; still, the
                 approach relies on maintaining the complete stream(s)
                 at each site, thus raising serious efficiency concerns
                 for massive data streams. In this paper, we propose
                 novel algorithms for efficiently tracking a broad class
                 of complex aggregate queries in such
                 distributed-streams settings. Our tracking schemes rely
                 on a novel combination of the geometric method with
                 compact sketch summaries of local data streams, and
                 maintain approximate answers with provable error
                 guarantees, while optimizing space and processing costs
                 at each remote site and communication cost across the
                 network. One of our key technical insights for the
                 effective use of the geometric method lies in
                 exploiting a much lower-dimensional space for
                 monitoring the sketch-based estimation query. Due to
                 the complex, highly nonlinear nature of these
                 estimates, efficiently monitoring the local geometric
                 constraints poses challenging algorithmic issues for
                 which we propose novel solutions. Experimental results
                 on real-life data streams verify the effectiveness of
                 our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Long:2013:DPT,
  author =       "Cheng Long and Raymond Chi-Wing Wong and H. V.
                 Jagadish",
  title =        "Direction-preserving trajectory simplification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "10",
  pages =        "949--960",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:50 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Trajectories of moving objects are collected in many
                 applications. Raw trajectory data is typically very
                 large, and has to be simplified before use. In this
                 paper, we introduce the notion of direction-preserving
                 trajectory simplification, and show both analytically
                 and empirically that it can support a broader range of
                 applications than traditional position-preserving
                 trajectory simplification. We present a polynomial-time
                 algorithm for optimal direction-preserving
                 simplification, and another approximate algorithm with
                 a quality guarantee. Extensive experimental evaluation
                 with real trajectory data shows the benefit of the new
                 techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bruno:2013:CCS,
  author =       "Nicolas Bruno and Sapna Jain and Jingren Zhou",
  title =        "Continuous cloud-scale query optimization and
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "961--972",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Massive data analysis in cloud-scale data centers
                 plays a crucial role in making critical business
                 decisions. High-level scripting languages free
                 developers from understanding various system
                 trade-offs, but introduce new challenges for query
                 optimization. One key optimization challenge is missing
                 accurate data statistics, typically due to massive data
                 volumes and their distributed nature, complex
                 computation logic, and frequent usage of user-defined
                 functions. In this paper we propose novel techniques to
                 adapt query processing in the Scope system, the
                 cloud-scale computation environment in Microsoft Online
                 Services. We continuously monitor query execution,
                 collect actual runtime statistics, and adapt parallel
                 execution plans as the query executes. We discuss
                 similarities and differences between our approach and
                 alternatives proposed in the context of traditional
                 centralized systems. Experiments on large-scale Scope
                 production clusters show that the proposed techniques
                 systematically solve the challenge of
                 missing/inaccurate data statistics, detect and resolve
                 partition skew and plan structure, and improve query
                 latency by a few folds for real workloads. Although we
                 focus on optimizing high-level languages, the same
                 ideas are also applicable for MapReduce systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cherniak:2013:OSB,
  author =       "Andrii Cherniak and Huma Zaidi and Vladimir
                 Zadorozhny",
  title =        "Optimization strategies for {A\slash B} testing on
                 {HADOOP}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "973--984",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this work, we present a set of techniques that
                 considerably improve the performance of executing
                 concurrent MapReduce jobs. Our proposed solution relies
                 on proper resource allocation for concurrent Hive jobs
                 based on data dependency, inter-query optimization and
                 modeling of Hadoop cluster load. To the best of our
                 knowledge, this is the first work towards
                 Hive/MapReduce job optimization which takes Hadoop
                 cluster load into consideration. We perform an
                 experimental study that demonstrates 233\% reduction in
                 execution time for concurrent vs sequential execution
                 schema. We report up to 40\% extra reduction in
                 execution time for concurrent job execution after
                 resource usage optimization. The results reported in
                 this paper were obtained in a pilot project to assess
                 the feasibility of migrating A/B testing from Teradata
                 + SAS analytics infrastructure to Hadoop. This work was
                 performed on eBay production Hadoop cluster.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elmeleegy:2013:POS,
  author =       "Khaled Elmeleegy",
  title =        "{Piranha}: optimizing short jobs in {Hadoop}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "985--996",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Cluster computing has emerged as a key parallel
                 processing platform for large scale data. All major
                 internet companies use it as their major central
                 processing platform. One of cluster computing's most
                 popular examples is MapReduce and its open source
                 implementation Hadoop. These systems were originally
                 designed for batch and massive-scale computations.
                 Interestingly, over time their production workloads
                 have evolved into a mix of a small fraction of large
                 and long-running jobs and a much bigger fraction of
                 short jobs. This came about because these systems end
                 up being used as data warehouses, which store most of
                 the data sets and attract ad hoc, short, data-mining
                 queries. Moreover, the availability of higher level
                 query languages that operate on top of these cluster
                 systems proliferated these ad hoc queries. Since
                 existing systems were not designed for short,
                 latency-sensistive jobs, short interactive jobs suffer
                 from poor response times. In this paper, we present
                 Piranha--a system for optimizing short jobs on Hadoop
                 without affecting the larger jobs. It runs on existing
                 unmodified Hadoop clusters facilitating its adoption.
                 Piranha exploits characteristics of short jobs learned
                 from production workloads at Yahoo! clusters to reduce
                 the latency of such jobs. To demonstrate Piranha's
                 effectiveness, we evaluated its performance using three
                 realistic short queries. Piranha was able to reduce the
                 queries' response times by up to 71\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sadoghi:2013:MUD,
  author =       "Mohammad Sadoghi and Kenneth A. Ross and Mustafa Canim
                 and Bishwaranjan Bhattacharjee",
  title =        "Making updates disk-{I/O} friendly using {SSDs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "997--1008",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multiversion databases store both current and
                 historical data. Rows are typically annotated with
                 timestamps representing the period when the row is/was
                 valid. We develop novel techniques for reducing index
                 maintenance in multiversion databases, so that indexes
                 can be used effectively for analytical queries over
                 current data without being a heavy burden on
                 transaction throughput. To achieve this end, we
                 re-design persistent index data structures in the
                 storage hierarchy to employ an extra level of
                 indirection. The indirection level is stored on solid
                 state disks that can support very fast random I/Os, so
                 that traversing the extra level of indirection incurs a
                 relatively small overhead. The extra level of
                 indirection dramatically reduces the number of magnetic
                 disk I/Os that are needed for index updates, and
                 localizes maintenance to indexes on updated attributes.
                 Further, we batch insertions within the indirection
                 layer in order to reduce physical disk I/Os for
                 indexing new records. By reducing the index maintenance
                 overhead on transactions, we enable operational data
                 stores to create more indexes to support queries. We
                 have developed a prototype of our indirection proposal
                 by extending the widely used Generalized Search Tree
                 (GiST) open-source project, which is also employed in
                 PostgreSQL. Our working implementation demonstrates
                 that we can significantly reduce index maintenance
                 and/or query processing cost, by a factor of 3. For
                 insertions of new records, our novel batching technique
                 can save up to 90\% of the insertion time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aji:2013:HGH,
  author =       "Ablimit Aji and Fusheng Wang and Hoang Vo and Rubao
                 Lee and Qiaoling Liu and Xiaodong Zhang and Joel
                 Saltz",
  title =        "{Hadoop GIS}: a high performance spatial data
                 warehousing system over {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1009--1020",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Support of high performance queries on large volumes
                 of spatial data becomes increasingly important in many
                 application domains, including geospatial problems in
                 numerous fields, location based services, and emerging
                 scientific applications that are increasingly data- and
                 compute-intensive. The emergence of massive scale
                 spatial data is due to the proliferation of cost
                 effective and ubiquitous positioning technologies,
                 development of high resolution imaging technologies,
                 and contribution from a large number of community
                 users. There are two major challenges for managing and
                 querying massive spatial data to support spatial
                 queries: the explosion of spatial data, and the high
                 computational complexity of spatial queries. In this
                 paper, we present Hadoop-GIS --- a scalable and high
                 performance spatial data warehousing system for running
                 large scale spatial queries on Hadoop. Hadoop-GIS
                 supports multiple types of spatial queries on MapReduce
                 through spatial partitioning, customizable spatial
                 query engine RESQUE, implicit parallel spatial query
                 execution on MapReduce, and effective methods for
                 amending query results through handling boundary
                 objects. Hadoop-GIS utilizes global partition indexing
                 and customizable on demand local spatial indexing to
                 achieve efficient query processing. Hadoop-GIS is
                 integrated into Hive to support declarative spatial
                 queries with an integrated architecture. Our
                 experiments have demonstrated the high efficiency of
                 Hadoop-GIS on query response and high scalability to
                 run on commodity clusters. Our comparative experiments
                 have showed that performance of Hadoop-GIS is on par
                 with parallel SDBMS and outperforms SDBMS for
                 compute-intensive queries. Hadoop-GIS is available as a
                 set of library for processing spatial queries, and as
                 an integrated software package in Hive.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bamba:2013:SCO,
  author =       "Bhuvan Bamba and Siva Ravada and Ying Hu and Richard
                 Anderson",
  title =        "Statistics collection in {Oracle Spatial and Graph}:
                 fast histogram construction for complex geometry
                 objects",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1021--1032",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Oracle Spatial and Graph is a geographic information
                 system (GIS) which provides users the ability to store
                 spatial data alongside conventional data in Oracle. As
                 a result of the coexistence of spatial and other data,
                 we observe a trend towards users performing
                 increasingly complex queries which involve spatial as
                 well as non-spatial predicates. Accurate selectivity
                 values, especially for queries with multiple predicates
                 requiring joins among numerous tables, are essential
                 for the database optimizer to determine a good
                 execution plan. For queries involving spatial
                 predicates, this requires that reasonably accurate
                 statistics collection has been performed on the spatial
                 data. For extensible data cartridges such as Oracle
                 Spatial and Graph, the optimizer expects to receive
                 accurate predicate selectivity and cost values from
                 functions implemented within the data cartridge.
                 Although statistics collection for spatial data has
                 been researched in academia for a few years; to the
                 best of our knowledge, this is the first work to
                 present spatial statistics collection implementation
                 details for a commercial GIS database. In this paper,
                 we describe our experiences with implementation of
                 statistics collection methods for complex geometry
                 objects within Oracle Spatial and Graph. Firstly, we
                 exemplify issues with previous partitioning-based
                 algorithms in presence of complex geometry objects and
                 suggest enhancements which resolve the issues.
                 Secondly, we propose a main memory implementation which
                 not only speeds up the disk-based partitioning
                 algorithms but also utilizes existing R-tree indexes to
                 provide surprisingly accurate selectivity estimates.
                 Last but not the least, we provide extensive
                 experimental results and an example study which
                 displays the efficacy of our approach on Oracle query
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Akidau:2013:MFT,
  author =       "Tyler Akidau and Alex Balikov and Kaya Bekiroglu and
                 Slava Chernyak and Josh Haberman and Reuven Lax and Sam
                 McVeety and Daniel Mills and Paul Nordstrom and Sam
                 Whittle",
  title =        "{MillWheel}: fault-tolerant stream processing at
                 {Internet} scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1033--1044",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MillWheel is a framework for building low-latency
                 data-processing applications that is widely used at
                 Google. Users specify a directed computation graph and
                 application code for individual nodes, and the system
                 manages persistent state and the continuous flow of
                 records, all within the envelope of the framework's
                 fault-tolerance guarantees. This paper describes
                 MillWheel's programming model as well as its
                 implementation. The case study of a continuous anomaly
                 detector in use at Google serves to motivate how many
                 of MillWheel's features are used. MillWheel's
                 programming model provides a notion of logical time,
                 making it simple to write time-based aggregations.
                 MillWheel was designed from the outset with fault
                 tolerance and scalability in mind. In practice, we find
                 that MillWheel's unique combination of scalability,
                 fault tolerance, and a versatile programming model
                 lends itself to a wide variety of problems at Google.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rae:2013:OAS,
  author =       "Ian Rae and Eric Rollins and Jeff Shute and Sukhdeep
                 Sodhi and Radek Vingralek",
  title =        "Online, asynchronous schema change in {F1}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1045--1056",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We introduce a protocol for schema evolution in a
                 globally distributed database management system with
                 shared data, stateless servers, and no global
                 membership. Our protocol is asynchronous--it allows
                 different servers in the database system to transition
                 to a new schema at different times--and online--all
                 servers can access and update all data during a schema
                 change. We provide a formal model for determining the
                 correctness of schema changes under these conditions,
                 and we demonstrate that many common schema changes can
                 cause anomalies and database corruption. We avoid these
                 problems by replacing corruption-causing schema changes
                 with a sequence of schema changes that is guaranteed to
                 avoid corrupting the database so long as all servers
                 are no more than one schema version behind at any time.
                 Finally, we discuss a practical implementation of our
                 protocol in F1, the database management system that
                 stores data for Google AdWords.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abraham:2013:SDD,
  author =       "Lior Abraham and John Allen and Oleksandr Barykin and
                 Vinayak Borkar and Bhuwan Chopra and Ciprian Gerea and
                 Daniel Merl and Josh Metzler and David Reiss and Subbu
                 Subramanian and Janet L. Wiener and Okay Zed",
  title =        "{Scuba}: diving into data at {Facebook}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1057--1067",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Facebook takes performance monitoring seriously.
                 Performance issues can impact over one billion users so
                 we track thousands of servers, hundreds of PB of daily
                 network traffic, hundreds of daily code changes, and
                 many other metrics. We require latencies of under a
                 minute from events occuring (a client request on a
                 phone, a bug report filed, a code change checked in) to
                 graphs showing those events on developers' monitors.
                 Scuba is the data management system Facebook uses for
                 most real-time analysis. Scuba is a fast, scalable,
                 distributed, in-memory database built at Facebook. It
                 currently ingests millions of rows (events) per second
                 and expires data at the same rate. Scuba stores data
                 completely in memory on hundreds of servers each with
                 144 GB RAM. To process each query, Scuba aggregates
                 data from all servers. Scuba processes almost a million
                 queries per day. Scuba is used extensively for
                 interactive, ad hoc, analysis queries that run in under
                 a second over live data. In addition, Scuba is the
                 workhorse behind Facebook's code regression analysis,
                 bug report monitoring, ads revenue monitoring, and
                 performance debugging.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shute:2013:FDS,
  author =       "Jeff Shute and Radek Vingralek and Bart Samwel and Ben
                 Handy and Chad Whipkey and Eric Rollins and Mircea
                 Oancea and Kyle Littlefield and David Menestrina and
                 Stephan Ellner and John Cieslewicz and Ian Rae and
                 Traian Stancescu and Himani Apte",
  title =        "{F1}: a distributed {SQL} database that scales",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1068--1079",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "F1 is a distributed relational database system built
                 at Google to support the AdWords business. F1 is a
                 hybrid database that combines high availability, the
                 scalability of NoSQL systems like Bigtable, and the
                 consistency and usability of traditional SQL databases.
                 F1 is built on Spanner, which provides synchronous
                 cross-datacenter replication and strong consistency.
                 Synchronous replication implies higher commit latency,
                 but we mitigate that latency by using a hierarchical
                 schema model with structured data types and through
                 smart application design. F1 also includes a fully
                 functional distributed SQL query engine and automatic
                 change tracking and publishing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Raman:2013:DBA,
  author =       "Vijayshankar Raman and Gopi Attaluri and Ronald Barber
                 and Naresh Chainani and David Kalmuk and Vincent
                 KulandaiSamy and Jens Leenstra and Sam Lightstone and
                 Shaorong Liu and Guy M. Lohman and Tim Malkemus and
                 Rene Mueller and Ippokratis Pandis and Berni Schiefer
                 and David Sharpe and Richard Sidle and Adam Storm and
                 Liping Zhang",
  title =        "{DB2} with {BLU} acceleration: so much more than just
                 a column store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1080--1091",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "DB2 with BLU Acceleration deeply integrates innovative
                 new techniques for defining and processing
                 column-organized tables that speed read-mostly Business
                 Intelligence queries by 10 to 50 times and improve
                 compression by 3 to 10 times, compared to traditional
                 row-organized tables, without the complexity of
                 defining indexes or materialized views on those tables.
                 But DB2 BLU is much more than just a column store.
                 Exploiting frequency-based dictionary compression and
                 main-memory query processing technology from the Blink
                 project at IBM Research --- Almaden, DB2 BLU performs
                 most SQL operations --- predicate application (even
                 range predicates and IN-lists), joins, and grouping ---
                 on the compressed values, which can be packed
                 bit-aligned so densely that multiple values fit in a
                 register and can be processed simultaneously via SIMD
                 (single-instruction, multipledata) instructions.
                 Designed and built from the ground up to exploit modern
                 multi-core processors, DB2 BLU's hardware-conscious
                 algorithms are carefully engineered to maximize
                 parallelism by using novel data structures that need
                 little latching, and to minimize data-cache and
                 instruction-cache misses. Though DB2 BLU is optimized
                 for in-memory processing, database size is not limited
                 by the size of main memory. Fine-grained synopses, late
                 materialization, and a new probabilistic buffer pool
                 protocol for scans minimize disk I/Os, while aggressive
                 prefetching reduces I/O stalls. Full integration with
                 DB2 ensures that DB2 with BLU Acceleration benefits
                 from the full functionality and robust utilities of a
                 mature product, while still enjoying order-of-magnitude
                 performance gains from revolutionary technology without
                 even having to change the SQL, and can mix
                 column-organized and row-organized tables in the same
                 tablespace and even within the same query.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ovsiannikov:2013:QFS,
  author =       "Michael Ovsiannikov and Silvius Rus and Damian Reeves
                 and Paul Sutter and Sriram Rao and Jim Kelly",
  title =        "The {Quantcast File System}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1092--1101",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Quantcast File System (QFS) is an efficient
                 alternative to the Hadoop Distributed File System
                 (HDFS). QFS is written in C++, is plugin compatible
                 with Hadoop MapReduce, and offers several efficiency
                 improvements relative to HDFS: 50\% disk space savings
                 through erasure coding instead of replication, a
                 resulting doubling of write throughput, a faster name
                 node, support for faster sorting and logging through a
                 concurrent append feature, a native command line client
                 much faster than hadoop fs, and global
                 feedback-directed I/O device management. As QFS works
                 out of the box with Hadoop, migrating data from HDFS to
                 QFS involves simply executing hadoop distcp. QFS is
                 being developed fully open source and is available
                 under an Apache license from
                 https://github.com/quantcast/qfs. Multi-petabyte QFS
                 instances have been in heavy production use since
                 2011.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bellamkonda:2013:ABD,
  author =       "Srikanth Bellamkonda and Hua-Gang Li and Unmesh Jagtap
                 and Yali Zhu and Vince Liang and Thierry Cruanes",
  title =        "Adaptive and big data scale parallel execution in
                 {Oracle}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1102--1113",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper showcases some of the newly introduced
                 parallel execution methods in Oracle RDBMS. These
                 methods provide highly scalable and adaptive evaluation
                 for the most commonly used SQL operations --- joins,
                 group-by, rollup/cube, grouping sets, and window
                 functions. The novelty of these techniques is their use
                 of multi-stage parallelization models, accommodation of
                 optimizer mistakes, and the runtime parallelization and
                 data distribution decisions. These parallel plans adapt
                 based on the statistics gathered on the real data at
                 query execution time. We realized enormous performance
                 gains from these adaptive parallelization techniques.
                 The paper also discusses our approach to parallelize
                 queries with operations that are inherently serial. We
                 believe all these techniques will make their way into
                 big data analytics and other massively parallel
                 database systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bellare:2013:WSM,
  author =       "Kedar Bellare and Carlo Curino and Ashwin
                 Machanavajihala and Peter Mika and Mandar Rahurkar and
                 Aamod Sane",
  title =        "{WOO}: a scalable and multi-tenant platform for
                 continuous knowledge base synthesis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1114--1125",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Search, exploration and social experience on the Web
                 has recently undergone tremendous changes with search
                 engines, web portals and social networks offering a
                 different perspective on information discovery and
                 consumption. This new perspective is aimed at capturing
                 user intents, and providing richer and highly connected
                 experiences. The new battleground revolves around
                 technologies for the ingestion, disambiguation and
                 enrichment of entities from a variety of structured and
                 unstructured data sources --- we refer to this process
                 as knowledge base synthesis. This paper presents the
                 design, implementation and production deployment of the
                 Web Of Objects (WOO) system, a Hadoop-based platform
                 tackling such challenges. WOO has been designed and
                 implemented to enable various products in Yahoo! to
                 synthesize knowledge bases (KBs) of entities relevant
                 to their domains. Currently, the implementation of WOO
                 we describe is used by various Yahoo! properties such
                 as Intonow, Yahoo! Local, Yahoo! Events and Yahoo!
                 Search. This paper highlights: (i) challenges that
                 arise in designing, building and operating a platform
                 that handles multi-domain, multi-version, and
                 multi-tenant disambiguation of web-scale knowledge
                 bases (hundreds of millions of entities), (ii) the
                 architecture and technical solutions we devised, and
                 (iii) an evaluation on real-world production
                 datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gattani:2013:EEL,
  author =       "Abhishek Gattani and Digvijay S. Lamba and Nikesh
                 Garera and Mitul Tiwari and Xiaoyong Chai and Sanjib
                 Das and Sri Subramaniam and Anand Rajaraman and Venky
                 Harinarayan and AnHai Doan",
  title =        "Entity extraction, linking, classification, and
                 tagging for social media: a {Wikipedia}-based
                 approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1126--1137",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many applications that process social data, such as
                 tweets, must extract entities from tweets (e.g.,
                 ``Obama'' and ``Hawaii'' in ``Obama went to Hawaii''),
                 link them to entities in a knowledge base (e.g.,
                 Wikipedia), classify tweets into a set of predefined
                 topics, and assign descriptive tags to tweets. Few
                 solutions exist today to solve these problems for
                 social data, and they are limited in important ways.
                 Further, even though several industrial systems such as
                 OpenCalais have been deployed to solve these problems
                 for text data, little if any has been published about
                 them, and it is unclear if any of the systems has been
                 tailored for social media. In this paper we describe in
                 depth an end-to-end industrial system that solves these
                 problems for social data. The system has been developed
                 and used heavily in the past three years, first at
                 Kosmix, a startup, and later at WalmartLabs. We show
                 how our system uses a Wikipedia-based global
                 ``real-time'' knowledge base that is well suited for
                 social data, how we interleave the tasks in a
                 synergistic fashion, how we generate and use contexts
                 and social signals to improve task accuracy, and how we
                 scale the system to the entire Twitter firehose. We
                 describe experiments that show that our system
                 outperforms current approaches. Finally we describe
                 applications of the system at Kosmix and WalmartLabs,
                 and lessons learned.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elmeleegy:2013:OTD,
  author =       "Hazem Elmeleegy and Yinan Li and Yan Qi and Peter
                 Wilmot and Mingxi Wu and Santanu Kolay and Ali Dasdan
                 and Songting Chen",
  title =        "Overview of turn data management platform for digital
                 advertising",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1138--1149",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper gives an overview of Turn Data Management
                 Platform (DMP). We explain the purpose of this type of
                 platforms, and show how it is positioned in the current
                 digital advertising ecosystem. We also provide a
                 detailed description of the key components in Turn DMP.
                 These components cover the functions of (1) data
                 ingestion and integration, (2) data warehousing and
                 analytics, and (3) real-time data activation. For all
                 components, we discuss the main technical and research
                 challenges, as well as the alternative design choices.
                 One of the main goals of this paper is to highlight the
                 central role that data management is playing in shaping
                 this fast growing multi-billion dollars industry.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Curtiss:2013:USS,
  author =       "Michael Curtiss and Iain Becker and Tudor Bosman and
                 Sergey Doroshenko and Lucian Grijincu and Tom Jackson
                 and Sandhya Kunnatur and Soren Lassen and Philip Pronin
                 and Sriram Sankar and Guanghao Shen and Gintaras Woss
                 and Chao Yang and Ning Zhang",
  title =        "{Unicorn}: a system for searching the social graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1150--1161",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Unicorn is an online, in-memory social graph-aware
                 indexing system designed to search trillions of edges
                 between tens of billions of users and entities on
                 thousands of commodity servers. Unicorn is based on
                 standard concepts in information retrieval, but it
                 includes features to promote results with good social
                 proximity. It also supports queries that require
                 multiple round-trips to leaves in order to retrieve
                 objects that are more than one edge away from source
                 nodes. Unicorn is designed to answer billions of
                 queries per day at latencies in the hundreds of
                 milliseconds, and it serves as an infrastructural
                 building block for Facebook's Graph Search product. In
                 this paper, we describe the data model and query
                 language supported by Unicorn. We also describe its
                 evolution as it became the primary backend for
                 Facebook's search offerings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ramazzina:2013:NSC,
  author =       "Sergio Ramazzina and Chiara L. Ballari and Daniela
                 Somenzi",
  title =        "A new service for customer care based on the
                 {Trentorise} bigdata platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1162--1163",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we give an overview of a platform
                 implemented in collaboration with the University of
                 Trento to deliver an innovative family of customer care
                 services.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Antonelli:2013:EDM,
  author =       "Fabrizio Antonelli and Antonino Casella and Cristiana
                 Chitic and Roberto Larcher and Giovanni Torrisi",
  title =        "Exploiting the diversity, mass and speed of
                 territorial data by {TELCO Operator} for better user
                 services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1164--1165",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bedini:2013:TBD,
  author =       "Ivan Bedini and Benedikt Elser and Yannis Velegrakis",
  title =        "The {Trento} big data platform for public
                 administration and large companies: use cases and
                 opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1166--1167",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tran:2013:DQO,
  author =       "Nga Tran and Sreenath Bodagala and Jaimin Dave",
  title =        "Designing query optimizers for big data problems of
                 the future",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1168--1169",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Vertica SQL Query Optimizer was written from the
                 ground up for the Vertica Analytic Database. Its
                 design, and the tradeoffs we encountered during
                 implementation, support the case that the full power of
                 novel database systems can be realized only with a
                 custom Query Optimizer, carefully crafted exclusively
                 for the system in which it operates.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Franceschini:2013:HMV,
  author =       "Monica Franceschini",
  title =        "How to maximize the value of big data with the open
                 source {SpagoBI} suite through a comprehensive
                 approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1170--1171",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper describes the approach adopted by SpagoBI
                 suite (\path=www.spagobi.org=) to manage large volumes
                 of heterogeneous structured and unstructured data, to
                 perform real-time Business Intelligence on Big Data
                 streaming and to give meaning to data through the
                 semantic analysis. SpagoBI supplies meaningful data
                 insights through the main concept of persistable and
                 schedulable datasets, and using tools such as
                 self-service BI, ad-hoc reporting, interactive
                 dashboards and explorative analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chang:2013:CAC,
  author =       "Edward Y. Chang",
  title =        "Context-aware computing: opportunities and open
                 issues",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1172--1173",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A 2011 Gartner report [3] describes context-aware
                 computing as a game-changing opportunity for
                 enterprises to improve both productivity and profits.
                 Context-aware computing is about making applications
                 and content more relevant to a user's context, e.g.,
                 when and where the user is, thereby improving user
                 experience. For instance, a coupon delivered to a user
                 at a wrong time or at a wrong location is considered a
                 nuisance. On the contrary, receiving a timely, usable
                 coupon before purchasing a merchandise is a treat.
                 Context-aware computing is not a new concept, but the
                 ongoing mobile revolution makes it both necessary and
                 feasible. o Necessary because the mobile phone display
                 is small and information must be delivered with much
                 higher relevance and precision to meet user needs. o
                 Feasible because small, light-weight mobile devices
                 allow users to almost always carry them around, and
                 much can be learned via a phone about its user's habits
                 and states. Context-aware computing involves first
                 acquiring context and then taking context-dependent
                 actions. For instance, a phone can sense a user's
                 location and turn off its GPS unit to conserve power
                 when the user enters a building, or it can collect EKG
                 signals of a user and trigger an alert if the user's
                 heart beats irregularly. Similarly, a restaurant can
                 send a coupon to a user when that user is queued up in
                 front of a nearby restaurant. The useful context can be
                 divided into three categories: information on the user
                 (knowledge of habits, emotional state, biophysiological
                 conditions), the user's environment (time, location,
                 co-location of others, social interaction), and the
                 user's tasks (transportation mode, engaged tasks,
                 general goals) [4]. Context-aware computing can be
                 applied to benefit applications in many areas including
                 but not limited to information retrieval, facility
                 management, productivity enhancement, in addition to
                 the aforementioned three examples representing power
                 management, health care, and commerce, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hassanzadeh:2013:NGD,
  author =       "Oktie Hassanzadeh and Anastasios Kementsietsidis and
                 Benny Kimelfeld and Rajasekar Krishnamurthy and Fatma
                 {\"O}zcan and Ippokratis Pandis",
  title =        "Next generation data analytics at {IBM} research",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1174--1175",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Brunato:2013:LIO,
  author =       "Mauro Brunato and Roberto Battiti",
  title =        "Learning and intelligent optimization {(LION)}: one
                 ring to rule them all",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1176--1177",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Almost by definition, optimization is a source of a
                 tremendous power for automatically improving processes,
                 decisions, products and services. But its potential is
                 still largely unexploited in most real-world contexts.
                 One of the main reasons blocking its widespread
                 adoption is that standard optimization assumes the
                 existence of a function $ f(x) $ to be minimized, while
                 in most real-world business contexts this function does
                 not exist or is extremely difficult and costly to build
                 by hand. Machine learning (ML) comes to the rescue: the
                 function (the model) can be built by machine learning
                 starting from abundant data. By Learning and
                 Intelligent Optimization (LION) we mean this
                 combination of learning from data and optimization
                 which can be applied to complex, dynamic, stochastic
                 contexts. This combination dramatically increases the
                 automation level and puts more power directly in the
                 hands of decision makers without resorting to
                 intermediate layers of data scientists (LION has a huge
                 potential for a self-service usage). Reaching this goal
                 is a huge challenge and it will require research at the
                 boundary between two areas, machine learning and
                 optimization, which have been traditionally
                 separated.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lomet:2013:MSS,
  author =       "David Lomet",
  title =        "{Microsoft SQL} server's integrated database approach
                 for modern applications and hardware",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1178--1179",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, there has been much renewed interest in
                 re-architecting database systems to exploit new
                 hardware. While some efforts have suggested that one
                 needs specialized engines (``one size does not fit
                 all''), the approach pursued by Microsoft's SQL Server
                 has been to integrate multiple elements into a common
                 architecture. This brings customers what they want by
                 reducing data impedance mismatches between database
                 systems that they are using for multiple purposes. This
                 integration is, of course, more easily said than done.
                 But this is, in fact, precisely what the SQL Server
                 team has done.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hacigumus:2013:OMS,
  author =       "Hakan Hac{\'\i}g{\"u}m{\"u}s and Jagan
                 Sankaranarayanan and Junichi Tatemura and Jeff LeFevre
                 and Neoklis Polyzotis",
  title =        "{Odyssey}: a multistore system for evolutionary
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1180--1181",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bouquet:2013:GEN,
  author =       "Paolo Bouquet and Andrea Molinari",
  title =        "A global {Entity Name System (ENS)} for data
                 ecosystems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1182--1183",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "After decades of schema-centric research on data
                 management and integration, the evolution of data on
                 the web and the adoption of resource-based models seem
                 to have shifted the focus towards an entity-centric
                 approach. Our thesis is that the missing element to
                 achieve the full potential of this approach is the
                 development of what we call an Entity Name System
                 (ENS), namely a system which provides a collection of
                 general services for managing the lifecycle of globally
                 unique identifiers in an open and decentralized
                 environment. The claim is that this system can indeed
                 play the coordination role that the DNS played for the
                 document-centric development of the current web.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sikka:2013:SHE,
  author =       "Vishal Sikka and Franz F{\"a}rber and Anil Goel and
                 Wolfgang Lehner",
  title =        "{SAP HANA}: the evolution from a modern main-memory
                 data platform to an enterprise application platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1184--1185",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "SAP HANA is a pioneering, and one of the best
                 performing, data platform designed from the grounds up
                 to heavily exploit modern hardware capabilities,
                 including SIMD, and large memory and CPU footprints. As
                 a comprehensive data management solution, SAP HANA
                 supports the complete data life cycle encompassing
                 modeling, provisioning, and consumption. This extended
                 abstract outlines the vision and planned next step of
                 the SAP HANA evolution growing from a core data
                 platform into an innovative enterprise application
                 platform as the foundation for current as well as novel
                 business applications in both on-premise and on-demand
                 scenarios. We argue that only a holistic system design
                 rigorously applying co-design at different levels may
                 yield a highly optimized and sustainable platform for
                 modern enterprise applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nambiar:2013:KTR,
  author =       "Raghunath Nambiar and Meikel Poess",
  title =        "Keeping the {TPC} relevant!",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1186--1187",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Transaction Processing Performance Council (TPC)
                 is a nonprofit organization founded in 1988 to define
                 transaction processing and database benchmarks. Since
                 then, the TPC has played a crucial role in providing
                 the industry with relevant standards for total system
                 performance, price-performance, and energy-efficiency
                 comparisons. TPC benchmarks are widely used by database
                 researchers and academia. Historically known for
                 database-centric standards, the TPC has developed a
                 benchmark for virtualization and is currently
                 developing a multisource data integration benchmark.
                 The technology landscape is changing at a rapid pace,
                 challenging industry experts and researchers to develop
                 innovative techniques for evaluating, measuring, and
                 characterizing the performance of modern application
                 systems. The Technology Conference series on
                 Performance Evaluation and Benchmarking (TPCTC),
                 introduced in 2009, and the new TPC-Express initiatives
                 are steps taken by the TPC to be relevant in the coming
                 years and beyond.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2013:BDI,
  author =       "Xin Luna Dong and Divesh Srivastava",
  title =        "Big data integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1188--1189",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Big Data era is upon us: data is being generated,
                 collected and analyzed at an unprecedented scale, and
                 data-driven decision making is sweeping through
                 society. Since the value of data explodes when it can
                 be linked and fused with other data, addressing the big
                 data integration (BDI) challenge is critical to
                 realizing the promise of Big Data. BDI differs from
                 traditional data integration in many dimensions: (i)
                 the number of data sources, even for a single domain,
                 has grown to be in the tens of thousands, (ii) many of
                 the data sources are very dynamic, as a huge amount of
                 newly collected data are continuously made available,
                 (iii) the data sources are extremely heterogeneous in
                 their structure, with considerable variety even for
                 substantially similar entities, and (iv) the data
                 sources are of widely differing qualities, with
                 significant differences in the coverage, accuracy and
                 timeliness of data provided. This tutorial explores the
                 progress that has been made by the data integration
                 community on the topics of schema mapping, record
                 linkage and data fusion in addressing these novel
                 challenges faced by big data integration, and
                 identifies a range of open problems for the
                 community.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Viglas:2013:JTC,
  author =       "Stratis D. Viglas",
  title =        "Just-in-time compilation for {SQL} query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1190--1191",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Just-in-time compilation of SQL queries into native
                 code has recently emerged as a viable alternative to
                 interpretation-based query processing. We present the
                 salient results of research in this fresh area,
                 addressing all aspects of the query processing stack.
                 Throughout the discussion we draw analogies to the
                 general code generation techniques used in contemporary
                 compiler technology. At the same time we describe the
                 open research problems of the area.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ailamaki:2013:TST,
  author =       "Anastasia Ailamaki and Ryan Johnson and Ippokratis
                 Pandis and P{\'\i}nar T{\"o}z{\"u}n",
  title =        "Toward scalable transaction processing: evolution of
                 {Shore-MT}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1192--1193",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Designing scalable transaction processing systems on
                 modern multicore hardware has been a challenge for
                 almost a decade. The typical characteristics of
                 transaction processing workloads lead to a high degree
                 of unbounded communication on multicores for
                 conventional system designs. In this tutorial, we
                 initially present a systematic way of eliminating
                 scalability bottlenecks of a transaction processing
                 system, which is based on minimizing the unbounded
                 communication. Then, we show several techniques that
                 apply the presented methodology to minimize logging,
                 locking, latching etc. related bottlenecks of
                 transaction processing systems. In parallel, we
                 demonstrate the internals of the Shore-MT storage
                 manager and how they have evolved over the years in
                 terms of scalability on multicore hardware through such
                 techniques. We also teach how to use Shore-MT with the
                 various design options it offers through its
                 application layer Shore-Kits and Metadata Frontend.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elmore:2013:TDV,
  author =       "Aaron J. Elmore and Carlo Curino and Divyakant Agrawal
                 and Amr {El Abbadi}",
  title =        "Towards database virtualization for database as a
                 service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1194--1195",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Advances in operating system and storage-level
                 virtualization technologies have enabled the effective
                 consolidation of heterogeneous applications in a shared
                 cloud infrastructure. Novel research challenges arising
                 from this new shared environment include load
                 balancing, workload estimation, resource isolation,
                 machine replication, live migration, and an emergent
                 need of automation to handle large scale operations
                 with minimal manual intervention. Given that databases
                 are at the core of most applications that are deployed
                 in the cloud, database management systems (DBMSs)
                 represent a very important technology component that
                 needs to be virtualized in order to realize the
                 benefits of virtualization from autonomic management of
                 data-intensive applications in large scale
                 data-centers. The goal of this tutorial is to survey
                 the techniques used in providing elasticity in virtual
                 machine systems, shared storage systems, and survey
                 database research on multitenant architectures and
                 elasticity primitives. This foundation of core Database
                 as a Service advances, together with a primer of
                 important related topics in OS and storage-level
                 virtualization, are central for anyone that wants to
                 operate in this area of research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mokbel:2013:MSN,
  author =       "Mohamed F. Mokbel and Mohamed Sarwat",
  title =        "Mobility and social networking: a data management
                 perspective",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "11",
  pages =        "1196--1197",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:56:54 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This tutorial presents the state-of-the-art research
                 that lies at the intersection of two hot topics in the
                 data management community: (1) social networking and
                 (2) mobility. In this tutorial, we give an overview of
                 existing research work, systems, and applications
                 related to both social networking and mobility. In
                 addition, we introduce several resources (i.e.,
                 datasets, software tools) as well as a list of
                 promising research directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xue:2013:DSD,
  author =       "Andy Yuan Xue and Rui Zhang and Yu Zheng and Xing Xie
                 and Jianhui Yu and Yong Tang",
  title =        "{DesTeller}: a system for destination prediction based
                 on trajectories with privacy protection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1198--1201",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Destination prediction is an essential task for a
                 number of emerging location based applications such as
                 recommending sightseeing places and sending targeted
                 advertisements. A common approach to destination
                 prediction is to derive the probability of a location
                 being the destination based on historical trajectories.
                 However, existing techniques suffer from the ``data
                 sparsity problem'', i.e., the number of available
                 historical trajectories is far from sufficient to cover
                 all possible trajectories. This problem considerably
                 limits the amount of query trajectories whose predicted
                 destinations can be inferred. In this demonstration, we
                 showcase a system named ``DesTeller'' that is
                 interactive, user-friendly, publicly accessible, and
                 capable of answering real-time queries. The underlying
                 algorithm Sub-Trajectory Synthesis (SubSyn)
                 successfully addressed the data sparsity problem and is
                 able to predict destinations for almost every query
                 submitted by travellers. We also consider the privacy
                 protection issue in case an adversary uses SubSyn
                 algorithm to derive sensitive location information of
                 users.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2013:SPS,
  author =       "Zhe Chen and Michael Cafarella and Jun Chen and Daniel
                 Prevo and Junfeng Zhuang",
  title =        "{Senbazuru}: a prototype spreadsheet database
                 management system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1202--1205",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Spreadsheets have become a critical data management
                 tool, but they lack explicit relational metadata,
                 making it difficult to join or integrate data across
                 multiple spreadsheets. Because spreadsheet data are
                 widely available on a huge range of topics, a tool that
                 allows easy spreadsheet integration would be hugely
                 beneficial for a variety of users. We demonstrate that
                 Senbazuru, a prototype spreadsheet database management
                 system (SSDBMS), is able to extract relational
                 information from spreadsheets. By doing so, it opens up
                 opportunities for integration among spreadsheets and
                 with other relational sources. Senbazuru allows users
                 to search for relevant spreadsheets in a large corpus,
                 probabilistically constructs a relational version of
                 the data, and offers several relational operations over
                 the resulting extracted data (including joins to other
                 spreadsheet data). Our demonstration is available on
                 two clients: a JavaScript-rich Web site and a touch
                 interface on the iPad. During the demo, Senbazuru will
                 allow VLDB participants to search spreadsheets, extract
                 relational data from them, and apply relational
                 operators such as select and join.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Smits:2013:RFQ,
  author =       "Gr{\'e}gory Smits and Olivier Pivert and Thomas
                 Girault",
  title =        "{ReqFlex}: fuzzy queries for everyone",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1206--1209",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration we present a complete
                 fuzzy-set-based approach to preference queries that
                 tackles the two main questions raised by the
                 introduction of flexibility and personalization when
                 querying relational databases: (i) how to efficiently
                 execute preference queries? and, (ii) how to help users
                 define preferences and queries? As an answer to the
                 first question, we propose PostgreSQL\_f, a module
                 implemented on top of PostgreSQL to handle fuzzy
                 queries. To answer the second question, we propose
                 ReqFlex an intuitive user interface to the definition
                 of preferences and the construction of fuzzy queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kaufmann:2013:CIT,
  author =       "Martin Kaufmann and Panagiotis Vagenas and Peter M.
                 Fischer and Donald Kossmann and Franz F{\"a}rber",
  title =        "Comprehensive and interactive temporal query
                 processing with {SAP HANA}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1210--1213",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demo, we present a prototype of a main memory
                 database system which provides a wide range of temporal
                 operators featuring predictable and interactive
                 response times. Much of real-life data is temporal in
                 nature, and there is an increasing application demand
                 for temporal models and operations in databases.
                 Nevertheless, SQL:2011 has only recently overcome a
                 decade-long standstill on standardizing temporal
                 features. As a result, few database systems provide any
                 temporal support, and even those only have limited
                 expressiveness and poor performance. Our prototype
                 combines an in-memory column store and a novel, generic
                 temporal index structure named Timeline Index. As we
                 will show on a workload based on real customer use
                 cases, it achieves predictable and interactive query
                 performance for a wide range of temporal query types
                 and data sizes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Grust:2013:FDT,
  author =       "Torsten Grust and Nils Schweinsberg and Alexander
                 Ulrich",
  title =        "Functions are data too: defunctionalization for
                 {PL\slash SQL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1214--1217",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate a full-fledged implementation of
                 first-class functions for the widely used PL/SQL
                 database programming language. Functions are treated as
                 regular data items that may be (1) constructed at query
                 runtime, (2) stored in and retrieved from tables, (3)
                 assigned to variables, and (4) passed to and from other
                 (higher-order) functions. The resulting PL/SQL dialect
                 concisely and elegantly expresses a wide range of new
                 query idioms which would be cumbersome to formulate if
                 functions remained second-class citizens. We include a
                 diverse set of application scenarios that make these
                 advantages tangible. First-class PL/SQL functions
                 require featherweight syntactic extensions only and
                 come with a non-invasive implementation-- the
                 defunctionalization transformation--that can entirely
                 be built on top of existing relational DBMS
                 infrastructure. An interactive demonstrator helps users
                 to experiment with the ``function as data'' paradigm
                 and to earn a solid intuition of its inner workings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ebaid:2013:NGD,
  author =       "Amr Ebaid and Ahmed Elmagarmid and Ihab F. Ilyas and
                 Mourad Ouzzani and Jorge-Arnulfo Quiane-Ruiz and Nan
                 Tang and Si Yin",
  title =        "{NADEEF}: a generalized data cleaning system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1218--1221",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present NADEEF, an extensible, generic and
                 easy-to-deploy data cleaning system. NADEEF
                 distinguishes between a programming interface and a
                 core to achieve generality and extensibility. The
                 programming interface allows users to specify data
                 quality rules by writing code that implements
                 predefined classes. These classes uniformly define what
                 is wrong with the data and (possibly) how to fix it. We
                 will demonstrate the following features provided by
                 NADEEF. (1) Heterogeneity: The programming interface
                 can be used to express many types of data quality rules
                 beyond the well known CFDs (FDs), MDs and ETL rules.
                 (2) Interdependency: The core algorithms can interleave
                 multiple types of rules to detect and repair data
                 errors. (3) Deployment and extensibility: Users can
                 easily customize NADEEF by defining new types of rules,
                 or by extending the core. (4) Metadata management and
                 data custodians: We show a live data quality dashboard
                 to effectively involve users in the data cleaning
                 process.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bergamaschi:2013:QKS,
  author =       "Sonia Bergamaschi and Francesco Guerra and Matteo
                 Interlandi and Raquel Trillo-Lado and Yannis
                 Velegrakis",
  title =        "{QUEST}: a keyword search system for relational data
                 based on semantic and machine learning techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1222--1225",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We showcase QUEST (QUEry generator for STructured
                 sources), a search engine for relational databases that
                 combines semantic and machine learning techniques for
                 transforming keyword queries into meaningful SQL
                 queries. The search engine relies on two approaches:
                 the forward, providing mappings of keywords into
                 database terms (names of tables and attributes, and
                 domains of attributes), and the backward, computing the
                 paths joining the data structures identified in the
                 forward step. The results provided by the two
                 approaches are combined within a probabilistic
                 framework based on the Dempster-Shafer Theory. We
                 demonstrate QUEST capabilities, and we show how, thanks
                 to the flexibility obtained by the probabilistic
                 combination of different techniques, QUEST is able to
                 compute high quality results even with few training
                 data and/or with hidden data sources such as those
                 found in the Deep Web.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bogh:2013:GNA,
  author =       "Kenneth S. B{\o}gh and Anders Skovsgaard and Christian
                 S. Jensen",
  title =        "{GroupFinder}: a new approach to top-$k$
                 point-of-interest group retrieval",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1226--1229",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The notion of point-of-interest (PoI) has existed
                 since paper road maps began to include markings of
                 useful places such as gas stations, hotels, and tourist
                 attractions. With the introduction of geopositioned
                 mobile devices such as smartphones and mapping services
                 such as Google Maps, the retrieval of PoIs relevant to
                 a user's intent has became a problem of automated
                 spatio-textual information retrieval. Over the last
                 several years, substantial research has gone into the
                 invention of functionality and efficient
                 implementations for retrieving nearby PoIs. However,
                 with a couple of exceptions existing proposals retrieve
                 results at single-PoI granularity. We assume that a
                 mobile device user issues queries consisting of
                 keywords and an automatically supplied geo-position,
                 and we target the common case where the user wishes to
                 find nearby groups of PoIs that are relevant to the
                 keywords. Such groups are relevant to users who wish to
                 conveniently explore several options before making a
                 decision such as to purchase a specific product.
                 Specifically, we demonstrate a practical proposal for
                 finding top-$k$PoI groups in response to a query. We
                 show how problem parameter settings can be mapped to
                 options that are meaningful to users. Further, although
                 this kind of functionality is prone to combinatorial
                 explosion, we will demonstrate that the functionality
                 can be supported efficiently in practical settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eldawy:2013:DSE,
  author =       "Ahmed Eldawy and Mohamed F. Mokbel",
  title =        "A demonstration of {SpatialHadoop}: an efficient
                 {MapReduce} framework for spatial data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1230--1233",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This demo presents SpatialHadoop as the first
                 full-fledged MapReduce framework with native support
                 for spatial data. SpatialHadoop is a comprehensive
                 extension to Hadoop that pushes spatial data inside the
                 core functionality of Hadoop. SpatialHadoop runs
                 existing Hadoop programs as is, yet, it achieves
                 order(s) of magnitude better performance than Hadoop
                 when dealing with spatial data. SpatialHadoop employs a
                 simple spatial high level language, a two-level spatial
                 index structure, basic spatial components built inside
                 the MapReduce layer, and three basic spatial
                 operations: range queries, $k$-NN queries, and spatial
                 join. Other spatial operations can be similarly
                 deployed in SpatialHadoop. We demonstrate a real system
                 prototype of SpatialHadoop running on an Amazon EC2
                 cluster against two sets of real spatial data obtained
                 from Tiger Files and OpenStreetMap with sizes 60GB and
                 300GB, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abbasoglu:2013:APC,
  author =       "Mehmet Ali Abbasoglu and Bugra Gedik and Hakan
                 Ferhatosmanoglu",
  title =        "Aggregate profile clustering for telco analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1234--1237",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many telco analytics require maintaining call profiles
                 based on recent customer call patterns. Such call
                 profiles are typically organized as aggregations
                 computed at different time scales over the recent
                 customer interactions. Customer call profiles are key
                 inputs for analytics targeted at improving operations,
                 marketing, and sales of telco providers. Many of these
                 analytics require clustering customer call profiles, so
                 that customers with similar calling patterns can be
                 modeled as a group. Example applications include
                 optimizing tariffs, customer segmentation, and usage
                 forecasting. In this demo, we present our system for
                 scalable aggregate profile clustering in a streaming
                 setting. We focus on managing anonymized segments of
                 customers for tariff optimization. Due to the large
                 number of customers, maintaining profile clusters have
                 high processing and memory resource requirements. In
                 order to tackle this problem, we apply distributed
                 stream processing. However, in the presence of
                 distributed state, it is a major challenge to partition
                 the profiles over machines (nodes) such that memory and
                 computation balance is maintained, while keeping the
                 clustering accuracy high. Furthermore, to adapt to
                 potentially changing customer calling patterns, the
                 partitioning of profiles to machines should be
                 continuously revised, yet one should minimize the
                 migration of profiles so as not to disturb the online
                 processing of updates. We provide a re-partitioning
                 technique that achieves all these goals. We keep
                 micro-cluster summaries at each node, collect these
                 summaries at a centralize node, and use a greedy
                 algorithm with novel affinity heuristics to revise the
                 partitioning. We present a demo that showcases our
                 Storm and Hbase based implementation of the proposed
                 solution in the context of a customer segmentation
                 application.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2013:RRO,
  author =       "Luying Chen and Stefano Ortona and Giorgio Orsi and
                 Michael Benedikt",
  title =        "{ROSeAnn}: reconciling opinions of semantic
                 annotators",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1238--1241",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Named entity extractors can be used to enrich both
                 text and Web documents with semantic annotations. While
                 originally focused on a few standard entity types, the
                 ecosystem of annotators is becoming increasingly
                 diverse, with recognition capabilities ranging from
                 generic to specialised entity types. Both the overlap
                 and the diversity in annotator vocabularies motivate
                 the need for managing and integrating semantic
                 annotations: allowing users to see the results of
                 multiple annotations and to merge them into a unified
                 solution. We demonstrate ROSEANN, a system for the
                 management of semantic annotations. ROSEANN provides
                 users with a unified view over the opinion of multiple
                 independent annotators both on text and Web documents.
                 It allows users to understand and reconcile conflicts
                 between annotations via ontology-aware aggregation.
                 ROSEANN incorporates both supervised aggregation,
                 appropriate when representative training data is
                 available, and an unsupervised method based on the
                 notion of weighted-repair.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sarwat:2013:RAR,
  author =       "Mohamed Sarwat and James Avery and Mohamed F. Mokbel",
  title =        "{RecDB} in action: recommendation made easy in
                 relational databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1242--1245",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we demonstrate RecDB; a full-fledged
                 database system that provides personalized
                 recommendation to users. We implemented RecDB using an
                 existing open source database system PostgreSQL, and we
                 demonstrate the effectiveness of RecDB using two
                 existing recommendation applications (1) Restaurant
                 Recommendation, (2) Movie Recommendation. To make the
                 demo even more interactive, we showcase a novel
                 application that recommends research papers presented
                 at VLDB 2013 to the conference attendees based on their
                 publication history in DBLP.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Drosou:2013:PTE,
  author =       "Marina Drosou and Evaggelia Pitoura",
  title =        "{POIKILO}: a tool for evaluating the results of
                 diversification models and algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1246--1249",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Search result diversification has attracted
                 considerable attention as a means of improving the
                 quality of results retrieved by user queries. In this
                 demonstration, we present Poikilo, a tool to assist
                 users in locating and evaluating diverse results. We
                 provide implementations of a wide suite of models and
                 algorithms to compute and compare diverse results.
                 Users can tune various diversification parameters,
                 combine diversity with relevance and also see how
                 diverse results change over time in the case of
                 streaming data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amsterdamer:2013:CMA,
  author =       "Yael Amsterdamer and Yael Grossman and Tova Milo and
                 Pierre Senellart",
  title =        "{CrowdMiner}: mining association rules from the
                 crowd",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1250--1253",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This demo presents CrowdMiner, a system enabling the
                 mining of interesting data patterns from the crowd.
                 While traditional data mining techniques have been used
                 extensively for finding patterns in classic databases,
                 they are not always suitable for the crowd, mainly
                 because humans tend to remember only simple trends and
                 summaries rather than exact details. To address this,
                 CrowdMiner employs a novel crowd-mining algorithm,
                 designed specifically for this context. The algorithm
                 iteratively chooses appropriate questions to ask the
                 crowd, while aiming to maximize the knowledge gain at
                 each step. We demonstrate CrowdMiner through a
                 Well-Being portal, constructed interactively by mining
                 the crowd, and in particular the conference
                 participants, for common health related practices and
                 trends.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2013:TTR,
  author =       "Chen Chen and Hongzhi Yin and Junjie Yao and Bin Cui",
  title =        "{TeRec}: a temporal recommender system over tweet
                 stream",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1254--1257",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As social media further integrates into our daily
                 lives, people are increasingly immersed in real-time
                 social streams via services such as Twitter and Weibo.
                 One important observation in these online social
                 platforms is that users' interests and the popularity
                 of topics shift very fast, which poses great challenges
                 on existing recommender systems to provide the right
                 topics at the right time. In this paper, we extend the
                 online ranking technique and propose a temporal
                 recommender system --- TeRec. In TeRec, when posting
                 tweets, users can get recommendations of topics
                 (hashtags) according to their real-time interests, they
                 can also generate fast feedbacks according to the
                 recommendations. TeRec provides the browser-based
                 client interface which enables the users to access the
                 real time topic recommendations, and the server side
                 processes and stores the real-time stream data. The
                 experimental study demonstrates the superiority of
                 TeRec in terms of temporal recommendation accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shkapsky:2013:GQN,
  author =       "Alexander Shkapsky and Kai Zeng and Carlo Zaniolo",
  title =        "Graph queries in a next-generation {Datalog} system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1258--1261",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent theoretical advances have enabled the use of
                 special monotonic aggregates in recursion. These
                 special aggregates make possible the concise expression
                 and efficient implementation of a rich new set of
                 advanced applications. Among these applications, graph
                 queries are particularly important because of their
                 pervasiveness in data intensive application areas. In
                 this demonstration, we present our Deductive
                 Application Language (DeAL) System, the first of a new
                 generation of Deductive Database Systems that support
                 applications that could not be expressed using regular
                 stratification, or could be expressed using
                 XY-stratification (also supported in DeAL) but suffer
                 from inefficient execution. Using example queries, we
                 will (i) show how complex graph queries can be
                 concisely expressed using DeAL and (ii) illustrate the
                 formal semantics and efficient implementation of these
                 powerful new monotonic constructs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hendawi:2013:IFS,
  author =       "Abdeltawab M. Hendawi and Jie Bao and Mohamed F.
                 Mokbel",
  title =        "{iRoad}: a framework for scalable predictive query
                 processing on road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1262--1265",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This demo presents the iRoad framework for evaluating
                 predictive queries on moving objects for road networks.
                 The main promise of the iRoad system is to support a
                 variety of common predictive queries including
                 predictive point query, predictive range query,
                 predictive KNN query, and predictive aggregate query.
                 The iRoad framework is equipped with a novel data
                 structure, named reachability tree, employed to
                 determine the reachable nodes for a moving object
                 within a specified future time $ \Tau $. In fact, the
                 reachability tree prunes the space around each object
                 in order to significantly reduce the computation time.
                 So, iRoad is able to scale up to handle real road
                 networks with millions of nodes, and it can process
                 heavy workloads on large numbers of moving objects.
                 During the demo, audience will be able to interact with
                 iRoad through a well designed Graphical User Interface
                 to issue different types of predictive queries on a
                 real road network, to obtain the predictive heatmap of
                 the area of interest, to follow the creation and the
                 dynamic update of the reachability tree around a
                 specific moving object, and finally to examine the
                 system efficiency and scalability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nagendra:2013:SFS,
  author =       "Mithila Nagendra and K. Sel{\c{c}}uk Candan",
  title =        "{SkySuite}: a framework of skyline-join operators for
                 static and stream environments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1266--1269",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Efficient processing of skyline queries has been an
                 area of growing interest over both static and stream
                 environments. Most existing static and streaming
                 techniques assume that the skyline query is applied to
                 a single data source. Unfortunately, this is not true
                 in many applications in which, due to the complexity of
                 the schema, the skyline query may involve attributes
                 belonging to multiple data sources. Recently, in the
                 context of static environments, various hybrid
                 skyline-join algorithms have been proposed. However,
                 these algorithms suffer from several drawbacks: they
                 often need to scan the data sources exhaustively in
                 order to obtain the set of skyline-join results;
                 moreover, the pruning techniques employed to eliminate
                 the tuples are largely based on expensive pairwise
                 tuple-to-tuple comparisons. On the other hand, most
                 existing streaming methods focus on single stream
                 skyline analysis, thus rendering these techniques
                 unsuitable for applications that require a real-time
                 ``join'' operation to be carried out before the skyline
                 query can be answered. Based on these observations, we
                 introduce and propose to demonstrate SkySuite: a
                 framework of skyline-join operators that can be
                 leveraged to efficiently process skyline-join queries
                 over both static and stream environments. Among others,
                 SkySuite includes (1) a novel Skyline-Sensitive Join
                 (SSJ) operator that effectively processes skyline-join
                 queries in static environments, and (2) a Layered
                 Skyline-window-Join (LSJ) operator that incrementally
                 maintains skyline-join results over stream
                 environments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhong:2013:PGP,
  author =       "Jianlong Zhong and Bingsheng He",
  title =        "Parallel graph processing on graphics processors made
                 easy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1270--1273",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper demonstrates Medusa, a programming
                 framework for parallel graph processing on graphics
                 processors (GPUs). Medusa enables developers to
                 leverage the massive parallelism and other hardware
                 features of GPUs by writing sequential C/C++ code for a
                 small set of APIs. This simplifies the implementation
                 of parallel graph processing on the GPU. The runtime
                 system of Medusa automatically executes the
                 user-defined APIs in parallel on the GPU, with a series
                 of graph-centric optimizations based on the
                 architecture features of GPUs. We will demonstrate the
                 steps of developing GPU-based graph processing
                 algorithms with Medusa, and the superior performance of
                 Medusa with both real-world and synthetic datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Richter:2013:MAO,
  author =       "Stefan Richter and Jens Dittrich and Stefan Schuh and
                 Tobias Frey",
  title =        "{Mosquito}: another one bites the data upload stream",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1274--1277",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Mosquito is a lightweight and adaptive physical design
                 framework for Hadoop. Mosquito connects to existing
                 data pipelines in Hadoop MapReduce and/or HDFS,
                 observes the data, and creates better physical designs,
                 i.e. indexes, as a byproduct. Our approach is minimally
                 invasive, yet it allows users and developers to easily
                 improve the runtime of Hadoop. We present three
                 important use cases: first, how to create indexes as a
                 byproduct of data uploads into HDFS; second, how to
                 create indexes as a byproduct of map tasks; and third,
                 how to execute map tasks as a byproduct of HDFS data
                 uploads. These use cases may even be combined.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hardock:2013:NDS,
  author =       "Sergej Hardock and Ilia Petrov and Robert Gottstein
                 and Alejandro Buchmann",
  title =        "{NoFTL}: database systems on {FTL}-less flash
                 storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1278--1281",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The database architecture and workhorse algorithms
                 have been designed to compensate for hard disk
                 properties. The I/O characteristics of Flash memories
                 have significant impact on database systems and many
                 algorithms and approaches taking advantage of those
                 have been proposed recently. Nonetheless on system
                 level Flash storage devices are still treated as HDD
                 compatible block devices, black boxes and fast HDD
                 replacements. This backwards compatibility (both
                 software and hardware) masks the native behaviour,
                 incurs significant complexity and decreases I/O
                 performance, making it non-robust and unpredictable.
                 Database systems have a long tradition of operating
                 directly on RAW storage natively, utilising the
                 physical characteristics of storage media to improve
                 performance. In this paper we demonstrate an approach
                 called NoFTL that goes a step further. We show that
                 allowing for native Flash access and integrating parts
                 of the FTL functionality into the database system
                 yields significant performance increase and
                 simplification of the I/O stack. We created a real-time
                 data-driven Flash emulator and integrated it
                 accordingly into Shore-MT. We demonstrate a performance
                 improvement of up to $ 3.7 \times $ compared to
                 Shore-MT on RAW block-device Flash storage under
                 various TPC workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kotsakos:2013:SUS,
  author =       "Dimitrios Kotsakos and Panos Sakkos and Vana
                 Kalogeraki and Dimitirios Gunopulos",
  title =        "{SmartMonitor}: using smart devices to perform
                 structural health monitoring",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1282--1285",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration, we are presenting SmartMonitor,
                 a distributed Structural Health Monitoring (SHM) system
                 consisting of smart devices. Over the last few years,
                 the vast majority of smart devices is equipped with
                 accelerometers that can be utilized towards building
                 SHM systems with hundreds of nodes. We describe a
                 scalable, fault-tolerant communication protocol, that
                 performs best-effort time synchronization of the nodes
                 and is used to implement a decentralized version of the
                 popular peak-picking SHM method. The implemented
                 interactive system can be easily installed in any
                 accelerometer-equipped Android device and the user has
                 a number of options for configuring the system or
                 analyzing the collected data and computed outcomes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kargin:2013:LEA,
  author =       "Yag{\'\i}z Karg{\'\i}n and Milena Ivanova and Ying
                 Zhang and Stefan Manegold and Martin Kersten",
  title =        "{Lazy ETL} in action: {ETL} technology dates
                 scientific data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1286--1289",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Both scientific data and business data have analytical
                 needs. Analysis takes place after a scientific data
                 warehouse is eagerly filled with all data from external
                 data sources (repositories). This is similar to the
                 initial loading stage of Extract, Transform, and Load
                 (ETL) processes that drive business intelligence. ETL
                 can also help scientific data analysis. However, the
                 initial loading is a time and resource consuming
                 operation. It might not be entirely necessary, e.g. if
                 the user is interested in only a subset of the data. We
                 propose to demonstrate Lazy ETL, a technique to lower
                 costs for initial loading. With it, ETL is integrated
                 into the query processing of the scientific data
                 warehouse. For a query, only the required data items
                 are extracted, transformed, and loaded transparently
                 on-the-fly. The demo is built around concrete
                 implementations of Lazy ETL for seismic data analysis.
                 The seismic data warehouse is ready for query
                 processing, without waiting for long initial loading.
                 The audience fires analytical queries to observe the
                 internal mechanisms and modifications that realize each
                 of the steps; lazy extraction, transformation, and
                 loading.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dayan:2013:EED,
  author =       "Niv Dayan and Martin Kj{\ae}r Svendsen and Matias
                 Bj{\o}rling and Philippe Bonnet and Luc Bouganim",
  title =        "{EagleTree}: exploring the design space of {SSD}-based
                 algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1290--1293",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Solid State Drives (SSDs) are a moving target for
                 system designers: they are black boxes, their internals
                 are undocumented, and their performance characteristics
                 vary across models. There is no appropriate analytical
                 model and experimenting with commercial SSDs is
                 cumbersome, as it requires a careful experimental
                 methodology to ensure repeatability. Worse, performance
                 results obtained on a given SSD cannot be generalized.
                 Overall, it is impossible to explore how a given
                 algorithm, say a hash join or LSM-tree insertions,
                 leverages the intrinsic parallelism of a modern SSD, or
                 how a slight change in the internals of an SSD would
                 impact its overall performance. In this paper, we
                 propose a new SSD simulation framework, named
                 EagleTree, which addresses these problems, and enables
                 a principled study of SSD-Based algorithms. The
                 demonstration scenario illustrates the design space for
                 algorithms based on an SSD-based IO stack, and shows
                 how researchers and practitioners can use EagleTree to
                 perform tractable explorations of this complex design
                 space.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sathe:2013:EPQ,
  author =       "Saket Sathe and Arthur Oviedo and Dipanjan Chakraborty
                 and Karl Aberer",
  title =        "{EnviroMeter}: a platform for querying
                 community-sensed data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1294--1297",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Efficiently querying data collected from Large-area
                 Community driven Sensor Networks (LCSNs) is a new and
                 challenging problem. In our previous works, we proposed
                 adaptive techniques for learning models (e.g.,
                 statistical, nonparametric, etc.) from such data,
                 considering the fact that LCSN data is typically
                 geo-temporally skewed. In this paper, we present a
                 demonstration of EnviroMeter. EnviroMeter uses our
                 adaptive model creation techniques for processing
                 continuous queries on community-sensed environmental
                 pollution data. Subsequently, it efficiently pushes
                 current pollution updates to GPS-enabled smartphones
                 (through its Android application) or displays it via a
                 web-interface. We experimentally demonstrate that our
                 model-based query processing approach is orders of
                 magnitude efficient than processing the queries over
                 indexed raw data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Okcan:2013:SEA,
  author =       "Alper Okcan and Mirek Riedewald and Biswanath Panda
                 and Daniel Fink",
  title =        "{Scolopax}: exploratory analysis of scientific data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1298--1301",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The formulation of hypotheses based on patterns found
                 in data is an essential component of scientific
                 discovery. As larger and richer data sets become
                 available, new scalable and user-friendly tools for
                 scientific discovery through data analysis are needed.
                 We demonstrate Scolopax, which explores the idea of a
                 search engine for hypotheses. It has an intuitive user
                 interface that supports sophisticated queries. Scolopax
                 can explore a huge space of possible hypotheses,
                 returning a ranked list of those that best match the
                 user preferences. To scale to large and complex data
                 sets, Scolopax relies on parallel data management and
                 mining techniques. These include model training,
                 efficient model summary generation, and novel parallel
                 join techniques that together with traditional
                 approaches such as clustering manipulate massive
                 model-summary collections to find the most interesting
                 hypotheses. This demonstration of Scolopax uses a real
                 observational data set, provided by the Cornell Lab of
                 Ornithology. It contains more than 3.3 million bird
                 sightings reported by citizen scientists and has almost
                 2500 attributes. Conference attendees have the
                 opportunity to make novel discoveries in this data set,
                 ranging from identifying variables that strongly affect
                 bird populations in specific regions to detecting more
                 sophisticated patterns such as habitat competition and
                 migration.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2013:PPA,
  author =       "Daniel Deutch and Yuval Moskovitch and Val Tannen",
  title =        "{PROPOLIS}: provisioned analysis of data-centric
                 processes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1302--1305",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We consider in this demonstration the (static)
                 analysis of data-centric process-based applications,
                 namely applications that depend on an underlying
                 database and whose control is guided by a finite state
                 transition system. We observe that analysts of such
                 applications often want to do more than analyze a
                 specific instance of the application's process control
                 and database. In particular they want to interactively
                 test and explore the effect on analysis results of
                 different hypothetical modifications applied to the
                 application's transition system and to the underlying
                 database. To that end, we propose a demonstration of
                 PROPOLIS, a system for PROvisioned PrOcess anaLysIS,
                 namely analysis of data-centric processes under
                 hypothetical modification scenarios. Our solution is
                 based on the notion of a provisioned expression (which
                 in turn is based on the notion of data provenance),
                 namely an expression that captures, in a compact way,
                 the analysis result with respect to all possible
                 combinations of scenarios, and allows for their
                 exploration at interactive speed. We will demonstrate
                 PROPOLIS in the context of an online shopping
                 application, letting participants play the role of
                 analysts.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Konda:2013:FSE,
  author =       "Pradap Konda and Arun Kumar and Christopher R{\'e} and
                 Vaishnavi Sashikanth",
  title =        "Feature selection in enterprise analytics: a
                 demonstration using an {R}-based data analytics
                 system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1306--1309",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Enterprise applications are analyzing ever larger
                 amounts of data using advanced analytics techniques.
                 Recent systems from Oracle, IBM, and SAP integrate R
                 with a data processing system to support richer
                 advanced analytics on large data. A key step in
                 advanced analytics applications is feature selection,
                 which is often an iterative process that involves
                 statistical algorithms and data manipulations. From our
                 conversations with data scientists and analysts at
                 enterprise settings, we observe three key aspects about
                 feature selection. First, feature selection is
                 performed by many types of users, not just data
                 scientists. Second, high performance is critical to
                 perform feature selection processes on large data.
                 Third, the provenance of the results and steps in
                 feature selection processes needs to be tracked for
                 purposes of transparency and auditability. Based on our
                 discussions with data scientists and the literature on
                 feature selection practice, we organize a set of
                 operations for feature selection into the Columbus
                 framework. We prototype Columbus as a library usable in
                 the Oracle R Enterprise environment. In this
                 demonstration, we use Columbus to showcase how we can
                 support various types of users of feature selection in
                 one system. We then show how we optimize performance
                 and manage the provenance of feature selection
                 processes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Najafi:2013:FQP,
  author =       "Mohammadreza Najafi and Mohammad Sadoghi and Hans-Arno
                 Jacobsen",
  title =        "Flexible query processor on {FPGAs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1310--1313",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this work, we demonstrate Flexible Query Processor
                 (FQP), an online reconfigurable event stream query
                 processor. FQP is an FPGA-based query processor that
                 supports select, project and join queries over event
                 streams at line rate. While processing incoming events,
                 FQP can accept new query expressions, a key
                 distinguishing characteristic from related approaches
                 employing FPGAs for acceleration. Our solution aims to
                 address performance limitations experienced with
                 general purpose processors needing to operate at line
                 rate and lack of on the fly reconfigurability with
                 custom designed hardware solutions on FPGAs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Civili:2013:MSM,
  author =       "Cristina Civili and Marco Console and Giuseppe {De
                 Giacomo} and Domenico Lembo and Maurizio Lenzerini and
                 Lorenzo Lepore and Riccardo Mancini and Antonella Poggi
                 and Riccardo Rosati and Marco Ruzzi and Valerio
                 Santarelli and Domenico Fabio Savo",
  title =        "{MASTRO STUDIO}: managing ontology-based data access
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1314--1317",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Ontology-based data access (OBDA) is a novel paradigm
                 for accessing large data repositories through an
                 ontology, that is a formal description of a domain of
                 interest. Supporting the management of OBDA
                 applications poses new challenges, as it requires to
                 provide effective tools for (i) allowing both expert
                 and non-expert users to analyze the OBDA specification,
                 (ii) collaboratively documenting the ontology, (iii)
                 exploiting OBDA services, such as query answering and
                 automated reasoning over ontologies, e.g., to support
                 data quality check, and (iv) tuning the OBDA
                 application towards optimized performances. To fulfill
                 these challenges, we have built a novel system, called
                 MASTRO STUDIO, based on a tool for automated reasoning
                 over ontologies, enhanced with a suite of tools and
                 optimization facilities for managing OBDA applications.
                 To show the effectiveness of MASTRO STUDIO, we
                 demonstrate its usage in one OBDA application developed
                 in collaboration with the Italian Ministry of Economy
                 and Finance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fuhry:2013:PHP,
  author =       "David Fuhry and Yang Zhang and Venu Satuluri and Arnab
                 Nandi and Srinivasan Parthasarathy",
  title =        "{PLASMA-HD}: probing the lattice structure and makeup
                 of high-dimensional data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1318--1321",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Rapidly making sense of, analyzing, and extracting
                 useful information from large and complex data is a
                 grand challenge. A user tasked with meeting this
                 challenge is often befuddled with questions on where
                 and how to begin to understand the relevant
                 characteristics of such data. Real-world problem
                 scenarios often involve scalability limitations and
                 time constraints. In this paper we present an
                 incremental interactive data analysis system as a step
                 to address this challenge. This system builds on recent
                 progress in the fields of interactive data exploration,
                 locality sensitive hashing, knowledge caching, and
                 graph visualization. Using visual clues based on rapid
                 incremental estimates, a user is provided a multi-level
                 capability to probe and interrogate the intrinsic
                 structure of data. Throughout the interactive process,
                 the output of previous probes can be used to construct
                 increasingly tight coherence estimates across the
                 parameter space, providing strong hints to the user
                 about promising analysis steps to perform next. We
                 present examples, interactive scenarios, and
                 experimental results on several synthetic and
                 real-world datasets which show the effectiveness and
                 efficiency of our approach. The implications of this
                 work are quite broad and can impact fields ranging from
                 top-$k$ algorithms to data clustering and from manifold
                 learning to similarity search.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Moyers:2013:DIP,
  author =       "Matthew Moyers and Emad Soroush and Spencer C. Wallace
                 and Simon Krughoff and Jake Vanderplas and Magdalena
                 Balazinska and Andrew Connolly",
  title =        "A demonstration of iterative parallel array processing
                 in support of telescope image analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1322--1325",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration, we present AscotDB, a new tool
                 for the analysis of telescope image data. AscotDB
                 results from the integration of ASCOT, a Web-based tool
                 for the collaborative analysis of telescope images and
                 their metadata, and SciDB, a parallel array processing
                 engine. We demonstrate the novel data exploration
                 supported by this integrated tool on a 1 TB dataset
                 comprising scientifically accurate, simulated telescope
                 images. We also demonstrate novel iterative-processing
                 features that we added to SciDB in order to support
                 this use-case.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abdelhaq:2013:EOL,
  author =       "Hamed Abdelhaq and Christian Sengstock and Michael
                 Gertz",
  title =        "{EvenTweet}: online localized event detection from
                 {Twitter}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1326--1329",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Microblogging services such as Twitter, Facebook, and
                 Foursquare have become major sources for information
                 about real-world events. Most approaches that aim at
                 extracting event information from such sources
                 typically use the temporal context of messages.
                 However, exploiting the location information of
                 georeferenced messages, too, is important to detect
                 localized events, such as public events or emergency
                 situations. Users posting messages that are close to
                 the location of an event serve as human sensors to
                 describe an event. In this demonstration, we present a
                 novel framework to detect localized events in real-time
                 from a Twitter stream and to track the evolution of
                 such events over time. For this, spatio-temporal
                 characteristics of keywords are continuously extracted
                 to identify meaningful candidates for event
                 descriptions. Then, localized event information is
                 extracted by clustering keywords according to their
                 spatial similarity. To determine the most important
                 events in a (recent) time frame, we introduce a scoring
                 scheme for events. We demonstrate the functionality of
                 our system, called Even-Tweet, using a stream of tweets
                 from Europe during the 2012 UEFA European Football
                 Championship.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mousavi:2013:ITM,
  author =       "Hamid Mousavi and Shi Gao and Carlo Zaniolo",
  title =        "{IBminer}: a text mining tool for constructing and
                 populating {InfoBox} databases and knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1330--1333",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Knowledge bases and structured summaries are playing a
                 crucial role in many applications, such as text
                 summarization, question answering, essay grading, and
                 semantic search. Although, many systems (e.g., DBpedia
                 and YaGo2) provide massive knowledge bases of such
                 summaries, they all suffer from incompleteness,
                 inconsistencies, and inaccuracies. These problems can
                 be addressed and much improved by combining and
                 integrating different knowledge bases, but their very
                 large sizes and their reliance on different
                 terminologies and ontologies make the task very
                 difficult. In this demo, we will demonstrate a system
                 that is achieving good success on this task by: (i)
                 employing available interlinks in the current knowledge
                 bases (e.g. external link and redirect links in
                 DBpedia) to combine information on individual entities,
                 and (ii) using widely available text corpora (e.g.
                 Wikipedia) and our IBminer text-mining system, to
                 generate and verify structured information, and
                 reconcile terminologies across different knowledge
                 bases. We will also demonstrate two tools designed to
                 support the integration process in close collaboration
                 with IBminer. The first is the InfoBox Knowledge-Base
                 Browser (IBKB) which provides structured summaries and
                 their provenance, and the second is the InfoBox Editor
                 (IBE), which is designed to suggest relevant attributes
                 for a user-specified subject, whereby the user can
                 easily improve the knowledge base without requiring any
                 knowledge about the internal terminology of individual
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Farnan:2013:PPA,
  author =       "Nicholas L. Farnan and Adam J. Lee and Panos K.
                 Chrysanthis and Ting Yu",
  title =        "{PAQO}: a preference-aware query optimizer for
                 {PostgreSQL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1334--1337",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Although the declarative nature of SQL provides great
                 utility to database users, its use in distributed
                 database management systems can leave users unaware of
                 which servers in the system are evaluating portions of
                 their queries. By allowing users to merely say what
                 data they are interested in accessing without providing
                 guidance regarding how to retrieve it, query optimizers
                 can generate plans with unintended consequences to the
                 user (e.g., violating user privacy by revealing
                 sensitive portions of a user's query to untrusted
                 servers, or impacting result freshness by pulling data
                 from stale data stores). To address these types of
                 issues, we have created a framework that empowers users
                 with the ability to specify constraints on the kinds of
                 plans that can be produced by the optimizer to evaluate
                 their queries. Such constraints are specified through
                 an extended version of SQL that we have developed which
                 we call PASQL. With this proposal, we aim to
                 demonstrate PAQO, a version of PostgreSQL's query
                 optimizer that we have modified to produce plans that
                 respect constraints specified through PASQL while
                 optimizing user-specified SQL queries in terms of
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bothe:2013:EPS,
  author =       "Suvarna Bothe and Panagiotis Karras and Akrivi
                 Vlachou",
  title =        "{eSkyline}: processing skyline queries over encrypted
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1338--1341",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The advent of cloud computing redefines the
                 traditional query processing paradigm. Whereas
                 computational overhead and memory constraints become
                 less prohibitive, data privacy, security, and
                 confidentiality concerns become top priorities. In
                 particular, as data owners outsource the management of
                 their data to service providers, query processing over
                 such data has more resources to tap into, yet the data
                 oftentimes has to be encrypted so as to prevent
                 unauthorized access. The challenge that arises in such
                 a setting is to devise an encryption scheme that still
                 allows for query results to be efficiently computed
                 using the encrypted data values. An important type of
                 query that raises unconventional requirements in terms
                 of the operator that has to be evaluated is the skyline
                 query, which returns a set of objects in a dataset
                 whose values are not dominated by any other object
                 therein. In this demonstration, we present eSkyline, a
                 prototype system and query interface that enables the
                 processing of skyline queries over encrypted data, even
                 without preserving the order on each attribute as
                 order-preserving encryption would do. Our system
                 comprises of an encryption scheme that facilitates the
                 evaluation of domination relationships, hence allows
                 for state-of-the-art skyline processing algorithms to
                 be used. The actual data values are reconstructed only
                 at the client side, where the encryption key is known.
                 Our demo visualizes the details of the encryption
                 scheme, allows a user to interact with a server, and
                 showcases the efficiency of computing skyline queries
                 and decrypting the results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2013:GMD,
  author =       "Lilong Jiang and Michael Mandel and Arnab Nandi",
  title =        "{GestureQuery}: a multitouch database query
                 interface",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1342--1345",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multitouch interfaces allow users to directly and
                 interactively manipulate data. We propose bringing such
                 interactive manipulation to the task of querying SQL
                 databases. This paper describes an initial
                 implementation of such an interface for multitouch
                 tablet devices called GestureQuery that translates
                 multitouch gestures into database queries. It provides
                 database users with immediate constructive feedback on
                 their queries, allowing rapid iteration and refinement
                 of those queries. Based on preliminary user studies,
                 Gesture-Query is easier to use, and lets users
                 construct target queries quicker than console-based SQL
                 and visual query builders while maintaining interactive
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2013:MLP,
  author =       "Di Yang and Kaiyu Zhao and Maryam Hasan and Hanyuan Lu
                 and Elke Rundensteiner and Matthew Ward",
  title =        "Mining and linking patterns across live data streams
                 and stream archives",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1346--1349",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We will demonstrate the visual analytics system $V$
                 istream$^T$, that supports interactive mining of
                 complex patterns within and across live data streams
                 and stream pattern archives. Our system is equipped
                 with both computational pattern mining and
                 visualization techniques, which allow it to not only
                 efficiently discover and manage patterns but also
                 effectively convey the mining results to human analysts
                 through visual displays. In our demonstration, we will
                 illustrate that with $V$ istream$^T$, analysts can
                 easily submit, monitor and interact with a broad range
                 of query types for pattern mining. This includes novel
                 strategies for extracting complex patterns from streams
                 in real time, summarizing neighbour-based patterns
                 using multi-resolution compression strategies,
                 selectively pushing patterns into the stream archive,
                 validating the popularity or rarity of stream patterns
                 by stream archive matching, and pattern evolution
                 tracking to link patterns across time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Samet:2013:PMQ,
  author =       "Hanan Samet and Marco D. Adelfio and Brendan C. Fruin
                 and Michael D. Lieberman and Jagan Sankaranarayanan",
  title =        "{PhotoStand}: a map query interface for a database of
                 news photos",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1350--1353",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "PhotoStand enables the use of a map query interface to
                 retrieve news photos associated with news articles that
                 are in turn associated with the principal locations
                 that they mention collected as a result of monitoring
                 the output of over 10,000 RSS news feeds, made
                 available within minutes of publication, and stored in
                 a PostgreSQL database. The news photos are ranked
                 according to their relevance to the clusters of news
                 articles associated with locations at which they are
                 displayed. This work differs from traditional work in
                 this field as the associated locations and topics (by
                 virtue of the cluster with which the articles
                 containing the news photos are associated) are
                 generated automatically without any human intervention
                 such as tagging, and that photos are retrieved by
                 location instead of just by keyword as is the case for
                 many existing systems. In addition, the clusters
                 provide a filtering step for detecting near-duplicate
                 news photos.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kumar:2013:HSH,
  author =       "K. Ashwin Kumar and Jonathan Gluck and Amol Deshpande
                 and Jimmy Lin",
  title =        "{Hone}: ``Scaling down'' {Hadoop} on shared-memory
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1354--1357",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The underlying assumption behind Hadoop and, more
                 generally, the need for distributed processing is that
                 the data to be analyzed cannot be held in memory on a
                 single machine. Today, this assumption needs to be
                 re-evaluated. Although petabyte-scale data-stores are
                 increasingly common, it is unclear whether ``typical''
                 analytics tasks require more than a single high-end
                 server. Additionally, we are seeing increased
                 sophistication in analytics, e.g., machine learning,
                 which generally operates over smaller and more refined
                 datasets. To address these trends, we propose ``scaling
                 down'' Hadoop to run on shared-memory machines. This
                 paper presents a prototype runtime called Hone,
                 intended to be both API and binary compatible with
                 standard (distributed) Hadoop. That is, Hone can take
                 an existing Hadoop jar and efficiently execute it,
                 without modification, on a multi-core shared memory
                 machine. This allows us to take existing Hadoop
                 algorithms and find the most suitable run-time
                 environment for execution on datasets of varying sizes.
                 Our experiments show that Hone can be an order of
                 magnitude faster than Hadoop pseudo-distributed mode
                 (PDM); on dataset sizes that fit into memory, Hone can
                 outperform a fully-distributed 15-node Hadoop cluster
                 in some cases as well.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Antenucci:2013:RGN,
  author =       "Dolan Antenucci and Erdong Li and Shaobo Liu and
                 Bochun Zhang and Michael J. Cafarella and Christopher
                 R{\'e}",
  title =        "{Ringtail}: a generalized nowcasting system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1358--1361",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Social media nowcasting--using online user activity to
                 describe real-world phenomena--is an active area of
                 research to supplement more traditional and costly data
                 collection methods such as phone surveys. Given the
                 potential impact of such research, we would expect
                 general-purpose nowcasting systems to quickly become a
                 standard tool among noncomputer scientists, yet it has
                 largely remained a research topic. We believe a major
                 obstacle to widespread adoption is the nowcasting
                 feature selection problem. Typical nowcasting systems
                 require the user to choose a handful of social media
                 objects from a pool of billions of potential
                 candidates, which can be a time-consuming and
                 error-prone process. We have built RINGTAIL, a
                 nowcasting system that helps the user by automatically
                 suggesting high-quality signals. We demonstrate that
                 RINGTALL can make nowcasting easier by suggesting
                 relevant features for a range of topics. The user
                 provides just a short topic query (e.g., unemployment)
                 and a small conventional dataset in order for RINGTALL
                 to quickly return a usable predictive nowcasting
                 model.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xie:2013:IIP,
  author =       "Min Xie and Laks V. S. Lakshmanan and Peter T. Wood",
  title =        "{IPS}: an interactive package configuration system for
                 trip planning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1362--1365",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "When planning a trip, one essential task is to find a
                 set of Places-of-Interest (POIs) which can be visited
                 during the trip. Using existing travel guides or
                 websites such as Lonely Planet and TripAdvisor, the
                 user has to either manually work out a desirable set of
                 POIs or take pre-configured travel packages; the former
                 can be time consuming while the latter lacks
                 flexibility. In this demonstration, we propose an
                 Interactive Package configuration System (IPS), which
                 visualizes different candidate packages on a map, and
                 enables users to configure a travel package through
                 simple interactions, i.e., comparing packages and
                 fixing/removing POIs from a package. Compared with
                 existing trip planning systems, we believe IPS strikes
                 the right balance between flexibility and manual
                 effort.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2013:RDS,
  author =       "Jingbo Zhou and Anthony K. H. Tung and Wei Wu and Wee
                 Siong Ng",
  title =        "{R2-D2}: a system to support probabilistic path
                 prediction in dynamic environments via ``Semi-lazy''
                 learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1366--1369",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Path prediction is presently an important area of
                 research with a wide range of applications. However,
                 most of the existing path prediction solutions are
                 based on eager learning methods which commit to a model
                 or a set of patterns extracted from historical
                 trajectories. Such methods do not perform very well in
                 dynamic environments where the objects' trajectories
                 are affected by many irregular factors which are not
                 captured by pre-defined models or patterns. In this
                 demonstration, we present the ``R2-D2'' system that
                 supports probabilistic path prediction in dynamic
                 environments. The core of our system is a ``semi-lazy''
                 learning approach to probabilistic path prediction
                 which builds a prediction model on the fly using
                 historical trajectories that are selected dynamically
                 based on the trajectories of target objects. Our
                 ``R2-D2'' system has a visual interface that shows how
                 our path prediction algorithm works on several
                 real-world datasets. It also allows us to experiment
                 with various parameter settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chun:2013:RRE,
  author =       "Byung-Gon Chun and Tyson Condie and Carlo Curino and
                 Chris Douglas and Sergiy Matusevych and Brandon Myers
                 and Shravan Narayanamurthy and Raghu Ramakrishnan and
                 Sriram Rao and Josh Rosen and Russell Sears and Markus
                 Weimer",
  title =        "{REEF}: retainable evaluator execution framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1370--1373",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demo proposal, we describe REEF, a framework
                 that makes it easy to implement scalable,
                 fault-tolerant runtime environments for a range of
                 computational models. We will demonstrate diverse
                 workloads, including extract-transform-load MapReduce
                 jobs, iterative machine learning algorithms, and ad-hoc
                 declarative query processing. At its core, REEF builds
                 atop YARN (Apache Hadoop 2's resource manager) to
                 provide retainable hardware resources with lifetimes
                 that are decoupled from those of computational tasks.
                 This allows us to build persistent (cross-job) caches
                 and cluster-wide services, but, more importantly,
                 supports high-performance iterative graph processing
                 and machine learning algorithms. Unlike existing
                 systems, REEF aims for composability of jobs across
                 computational models, providing significant performance
                 and usability gains, even with legacy code. REEF
                 includes a library of interoperable data management
                 primitives optimized for communication and data
                 movement (which are distinct from storage locality).
                 The library also allows REEF applications to access
                 external services, such as user-facing relational
                 databases. We were careful to decouple lower levels of
                 REEF from the data models and semantics of systems
                 built atop it. The result was two new standalone
                 systems: Tang, a configuration manager and dependency
                 injector, and Wake, a state-of-the-art event-driven
                 programming and data movement framework. Both are
                 language independent, allowing REEF to bridge the JVM
                 and .NET.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2013:OTP,
  author =       "Shuhao Zhang and Jiong He and Bingsheng He and Mian
                 Lu",
  title =        "{OmniDB}: towards portable and efficient query
                 processing on parallel {CPU\slash GPU} architectures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1374--1377",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Driven by the rapid hardware development of parallel
                 CPU/GPU architectures, we have witnessed emerging
                 relational query processing techniques and
                 implementations on those parallel architectures.
                 However, most of those implementations are not portable
                 across different architectures, because they are
                 usually developed from scratch and target at a specific
                 architecture. This paper proposes a kernel-adapter
                 based design (OmniDB), a portable yet efficient query
                 processor on parallel CPU/GPU architectures. OmniDB
                 attempts to develop an extensible query processing
                 kernel (qKernel) based on an abstract model for
                 parallel architectures, and to leverage an
                 architecture-specific layer (adapter) to make qKernel
                 be aware of the target architecture. The goal of OmniDB
                 is to maximize the common functionality in qKernel so
                 that the development and maintenance efforts for
                 adapters are minimized across different architectures.
                 In this demo, we demonstrate our initial efforts in
                 implementing OmniDB, and present the preliminary
                 results on the portability and efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Savkovic:2013:CAI,
  author =       "Ognjen Savkovi{\'c} and Paramita Mirza and Alex Tomasi
                 and Werner Nutt",
  title =        "Complete approximations of incomplete queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1378--1381",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a system that computes for a query that may
                 be incomplete, complete approximations from above and
                 from below. We assume a setting where queries are posed
                 over a partially complete database, that is, a database
                 that is generally incomplete, but is known to contain
                 complete information about specific aspects of its
                 application domain. Which parts are complete, is
                 described by a set of so-called table-completeness
                 statements. Previous work led to a theoretical
                 framework and an implementation that allowed one to
                 determine whether in such a scenario a given
                 conjunctive query is guaranteed to return a complete
                 set of answers or not. With the present demonstrator we
                 show how to reformulate the original query in such a
                 way that answers are guaranteed to be complete. If
                 there exists a more general complete query, there is a
                 unique most specific one, which we find. If there
                 exists a more specific complete query, there may even
                 be infinitely many. In this case, we find the least
                 specific specializations whose size is bounded by a
                 threshold provided by the user. Generalizations are
                 computed by a fixpoint iteration, employing an answer
                 set programming engine. Specializations are found
                 leveraging unification from logic programming.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koutrika:2013:UAU,
  author =       "Georgia Koutrika and Qian Lin and Jerry Liu",
  title =        "User analytics with {UbeOne}: insights into web
                 printing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1382--1385",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As web and mobile applications become more sensitive
                 to the user context, there is a shift from purely
                 off-line processing of user actions (log analysis) to
                 real-time user analytics that can generate information
                 about the user context to be instantly leveraged by the
                 application. Ubeone is a system that enables both
                 real-time and aggregate analytics from user data. The
                 system is designed as a set of lightweight, composeable
                 mechanisms that can progressively and collectively
                 analyze a user action, such as pinning, saving or
                 printing a web page. We will demonstrate the system
                 capabilities on analyzing a live feed of URLs printed
                 through a proprietary, web browser plug-in. This is in
                 fact the first analysis of web printing activity. We
                 will also give a taste of how the system can enable
                 instant recommendations based on the user context.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Santos:2013:DDS,
  author =       "Ivo Santos and Marcel Tilly and Badrish Chandramouli
                 and Jonathan Goldstein",
  title =        "{DiAl}: distributed streaming analytics anywhere,
                 anytime",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1386--1389",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Connected devices are expected to grow to 50 billion
                 in 2020. Through our industrial partners and their use
                 cases, we validated the importance of inflight data
                 processing to produce results with low latency, in
                 particular local and global data analytics
                 capabilities. In order to cope with the scalability
                 challenges posed by distributed streaming analytics
                 scenarios, we propose two new technologies: (1)
                 JStreams, a low footprint and efficient JavaScript
                 complex event processing engine supporting local
                 analytics on heterogeneous devices and (2) DiAlM, a
                 distributed analytics management service that leverages
                 cloud-edge evolving topologies. In the demonstration,
                 based on a real manufacturing use case, we walk through
                 a situation where operators supervise manufacturing
                 equipment through global analytics, and drill down into
                 alarm cases on the factory floor by locally inspecting
                 the data generated by the manufacturing equipment.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chirkova:2013:BUW,
  author =       "Rada Chirkova and Jun Yang",
  title =        "Big and useful: what's in the data for me?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1390--1391",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bartos:2013:UIA,
  author =       "Tom{\'a}s Bartos",
  title =        "Universal indexing of arbitrary similarity models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1392--1397",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The increasing amount of available unstructured
                 content together with the growing number of large
                 nonrelational databases put more emphasis on the
                 content-based retrieval and precisely on the area of
                 similarity searching. Although there exist several
                 indexing methods for efficient querying, not all of
                 them are best-suited for arbitrary similarity models.
                 Having a metric space, we can easily apply metric
                 access methods but for nonmetric models which typically
                 better describe similarities between generally
                 unstructured objects the situation is a little bit more
                 complicated. To address this challenge, we introduce
                 SIMDEX, the universal framework that is capable of
                 finding alternative indexing methods that will serve
                 for efficient yet effective similarity searching for
                 any similarity model. Using trivial or more advanced
                 methods for the incremental exploration of possible
                 indexing techniques, we are able to find alternative
                 methods to the widely used metric space model paradigm.
                 Through experimental evaluations, we validate our
                 approach and show how it outperforms the known indexing
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bress:2013:WIT,
  author =       "Sebastian Bre{\ss} and Gunter Saake",
  title =        "Why it is time for a {HyPE}: a hybrid query processing
                 engine for efficient {GPU} coprocessing in {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1398--1403",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "GPU acceleration is a promising approach to speed up
                 query processing of database systems by using low cost
                 graphic processors as coprocessors. Two major trends
                 have emerged in this area: (1) The development of
                 frameworks for scheduling tasks in heterogeneous
                 CPU/GPU platforms, which is mainly in the context of
                 coprocessing for applications and does not consider
                 specifics of database-query processing and
                 optimization. (2) The acceleration of database
                 operations using efficient GPU algorithms, which
                 typically cannot be applied easily on other database
                 systems, because of their analytical-algorithm-specific
                 cost models. One major challenge is how to combine
                 traditional database query processing with GPU
                 coprocessing techniques and efficient database
                 operation scheduling in a GPU-aware query optimizer. In
                 this thesis, we develop a hybrid query processing
                 engine, which extends the traditional physical
                 optimization process to generate hybrid query plans and
                 to perform a cost-based optimization in a way that the
                 advantages of CPUs and GPUs are combined. Furthermore,
                 we aim at a portable solution between different
                 GPU-accelerated database management systems to maximize
                 applicability. Preliminary results indicate great
                 potential.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mahdiraji:2013:DSU,
  author =       "Alireza Rezaei Mahdiraji and Peter Baumann",
  title =        "Database support for unstructured meshes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1404--1409",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Despite ubiquitous usage of unstructured mesh in many
                 application domains (e.g., computer aided design,
                 scientific simulation, climate modeling, etc.), there
                 is no specialized mesh database which supports storing
                 and querying such data structures. Existing mesh
                 libraries use file-based APIs which do not support
                 declarative querying and are difficult to maintain. A
                 mesh database can benefit these domains in several ways
                 such as: declarative query language, ease of
                 maintenance, query optimization, etc. In this thesis
                 work, the core idea is to have a very general model
                 which can represent objects from different domains and
                 specialize it to smaller object classes using
                 combinatorial constraints. We propose the Incidence
                 multi-Graph Complex (ImG-Complex) data model for
                 storing combinatorial aspect of meshes in a database.
                 We extend incidence graph (IG) representation with
                 multi-incidence information (ImG) to represent a class
                 of objects which we call ImG-Complexes. ImG-Complex can
                 support a wide range of application domains. We
                 introduce optional and application-specific constraints
                 to restrain the general ImG model to specific object
                 classes or specific geometric representations. The
                 constraints check validity of meshes based on the
                 properties of the modeled object class. Finally, we
                 show how graph databases can be utilized and reused to
                 query some combinatorial mesh queries based on the
                 (possibly constrained) ImG model. In particular, we
                 show the strengths and limitations of a graph-only
                 query language in expressing combinatorial mesh
                 queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Madaan:2013:DSM,
  author =       "Aastha Madaan and Subhash Bhalla",
  title =        "Domain specific multistage query language for medical
                 document repositories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1410--1415",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Vast amount of medical information is increasingly
                 available on the Web. As a result, seeking medical
                 information through queries is gaining importance in
                 the medical domain. The existing keyword-based search
                 engines such as Google, Yahoo fail to suffice the needs
                 of the health-care workers (who are well-versed with
                 the domain knowledge required for querying) using these
                 they often face results which are irrelevant and not
                 useful for their tasks. In this paper, we present the
                 need and the challenges for a user-level,
                 domain-specific query language for the specialized
                 document repositories of the medical domain. This topic
                 has not been sufficiently addressed by the existing
                 approaches including SQL-like query languages or
                 general-purpose keyword-based search engines and
                 document-level indexing based search. We aim to bridge
                 the gap between information needs of the
                 skilled/semi-skilled domain users and the query
                 capability provided by the query language. Overcoming
                 such a challenge can facilitate effective use of large
                 volume of information on the Web (and in the electronic
                 health records (EHRs)repositories).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Taxidou:2013:RAI,
  author =       "Io Taxidou and Peter Fischer",
  title =        "Realtime analysis of information diffusion in social
                 media",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1416--1421",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The goal of this thesis is to investigate real-time
                 analysis methods on social media with a focus on
                 information diffusion. From a conceptual point of view,
                 we are interested both in the structural, sociological
                 and temporal aspects of information diffusion in social
                 media with a twist on the real time factor of what is
                 happening right now. From a technical side, the sheer
                 size of current social media services (100's of
                 millions of users) and the large amount of data
                 produced by these users renders conventional approaches
                 for these costly analyses impossible. For that, we need
                 to go beyond the state-of-the-art infrastructure for
                 data-intensive computation. Our high level goal is to
                 investigate how information diffuses in real time on
                 the underlying social network and the role of different
                 users in the propagation process. We plan to implement
                 these analyses with full and partially missing datasets
                 and compare the cost and quality of both approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bonomi:2013:MFP,
  author =       "Luca Bonomi and Li Xiong",
  title =        "Mining frequent patterns with differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1422--1427",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The mining of frequent patterns is a fundamental
                 component in many data mining tasks. A considerable
                 amount of research on this problem has led to a wide
                 series of efficient and scalable algorithms for mining
                 frequent patterns. However, releasing these patterns is
                 posing concerns on the privacy of the users
                 participating in the data. Indeed the information from
                 the patterns can be linked with a large amount of data
                 available from other sources creating opportunities for
                 adversaries to break the individual privacy of the
                 users and disclose sensitive information. In this
                 proposal, we study the mining of frequent patterns in a
                 privacy preserving setting. We first investigate the
                 difference between sequential and itemset patterns, and
                 second we extend the definition of patterns by
                 considering the absence and presence of noise in the
                 data. This leads us in distinguishing the patterns
                 between exact and noisy. For exact patterns, we
                 describe two novel mining techniques that we previously
                 developed. The first approach has been applied in a
                 privacy preserving record linkage setting, where our
                 solution is used to mine frequent patterns which are
                 employed in a secure transformation procedure to link
                 records that are similar. The second approach improves
                 the mining utility results using a two-phase strategy
                 which allows to effectively mine frequent substrings as
                 well as prefixes patterns. For noisy patterns, first we
                 formally define the patterns according to the type of
                 noise and second we provide a set of potential
                 applications that require the mining of these patterns.
                 We conclude the paper by stating the challenges in this
                 new setting and possible future research directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hoppe:2013:AOB,
  author =       "Anett Hoppe and C. Nicolle and A. Roxin",
  title =        "Automatic ontology-based user profile learning from
                 heterogeneous {Web} resources in a big data context",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1428--1433",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Web has developed to the biggest source of
                 information and entertainment in the world. By its
                 size, its adaptability and flexibility, it challenged
                 our current paradigms on information sharing in several
                 areas. By offering everybody the opportunity to release
                 own contents in a fast and cheap way, the Web already
                 led to a revolution of the traditional publishing world
                 and just now, it commences to change the perspective on
                 advertisements. With the possibility to adapt the
                 contents displayed on a page dynamically based on the
                 viewer's context, campaigns launched to target rough
                 customer groups will become an element of the past.
                 However, this new ecosystem, that relates
                 advertisements with the user, heavily relies on the
                 quality of the underlying user profile. This profile
                 has to be able to model any combination of user
                 characteristics, the relations between its composing
                 elements and the uncertainty that stems from the
                 automated processing of real-world data. The work at
                 hand describes the beginnings of a PhD project that
                 aims to tackle those issues using a combination of data
                 analysis, ontology engineering and processing of big
                 data resources provided by an industrial partner. The
                 final goal is to automatically construct and populate a
                 profile ontology for each user identified by the
                 system. This allows to associate these users to
                 high-value audience segments in order to drive digital
                 marketing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dey:2013:STA,
  author =       "Akon Dey and Alan Fekete and Uwe R{\"o}hm",
  title =        "Scalable transactions across heterogeneous {NoSQL}
                 key--value data stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1434--1439",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many cloud systems provide data stores with limited
                 features, especially they may not provide transactions,
                 or else restrict transactions to a single item. We
                 propose a approach that gives multi-item transactions
                 across heterogeneous data stores, using only a minimal
                 set of features from each store such as single item
                 consistency, conditional update, and the ability to
                 include extra metadata within a value. We offer a
                 client-coordinated transaction protocol that does not
                 need a central coordinating infrastructure. A prototype
                 implementation has been built as a Java library and
                 measured with an extension of YCSB benchmark to
                 exercise multi-item transactions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ngo:2013:GUS,
  author =       "Nhung Ngo and Enrico Franconi",
  title =        "Getting unique solution in data exchange",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1440--1443",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A schema mapping is a high-level specification in
                 which the relationship between two database schemas is
                 described. In data exchange, schema mappings are
                 one-way mappings that describe which data can be
                 brought from source data to target data. Therefore,
                 given a source instance and a mapping, there might be
                 more than one valid target instance. This fact causes
                 many problems in query answering over target data for
                 non-conjunctive queries. To make query answering
                 feasible for all queries, we focus on a methodology for
                 extending the original schema mapping to guarantee the
                 uniqueness of target instance corresponding to a source
                 instance. To this end, we introduce a theoretical
                 framework where the problem is transformed to an
                 abduction problem, namely, definability abduction. We
                 apply the framework to relational data exchange setting
                 and solve the problem by pointing out minimal solutions
                 according to a specific semantic minimality
                 criterion.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kaufmann:2013:SPT,
  author =       "Martin Kaufmann and Donald Kossmann",
  title =        "Storing and processing temporal data in a main memory
                 column store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1444--1449",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Managing and accessing temporal data is of increasing
                 importance in industry. So far, most companies model
                 the time dimension on the application layer rather than
                 pushing down the operators to the database, which leads
                 to a significant performance overhead. The goal of this
                 PhD thesis is to develop a native support of temporal
                 features for SAP HANA, which is a commercial in-memory
                 column store database system. We investigate different
                 alternatives to store temporal data physically and
                 analyze the trade-offs arising from different memory
                 layouts which cluster the data either by time or by
                 space dimension. Taking into account the underlying
                 physical representation, different temporal operators
                 such as temporal aggregation, time travel and temporal
                 join have to be executed efficiently. We present a
                 novel data structure called Timeline Index and
                 algorithms based on this index, which have a very
                 competitive performance for all temporal operators
                 beating existing best-of-breed approaches by factors,
                 sometimes even by orders of magnitude. The results of
                 this thesis are currently being integrated into HANA,
                 with the goal of being shipped to the customers as a
                 productive release within the next few months.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kozak:2013:ESS,
  author =       "Stepan Kozak and Pavel Zezula",
  title =        "Efficiency and security in similarity cloud services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1450--1455",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With growing popularity of cloud services, the trend
                 in the industry is to outsource the data to a 3rd party
                 system that provides searching in the data as a
                 service. This approach naturally brings privacy
                 concerns about the (potentially sensitive) data.
                 Recently, quite extensive research of outsourcing
                 classic exact-match or keyword search has been done.
                 However, not much attention has been paid to the
                 outsourcing of the similarity search, which becomes
                 more and more important in information retrieval
                 applications. In this work, we propose to the research
                 community a model of outsourcing similarity search to
                 the cloud environment (so called similarity cloud). We
                 establish privacy and efficiency requirements to be
                 laid down for the similarity cloud with an emphasis on
                 practical use of the system in real applications; this
                 requirement list can be used as a general guideline for
                 practical system analysis and we use it to analyze
                 current existing approaches. We propose two new
                 similarity indexes that ensure data privacy and thus
                 are suitable for search systems outsourced in a cloud.
                 The balance of the first proposed technique EM-Index is
                 more on the efficiency side while the other (DSH Index)
                 shifts this balance more to the privacy side.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sellam:2013:FCD,
  author =       "Thibault Sellam and Martin Kersten",
  title =        "Fast cartography for data explorers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "12",
  pages =        "1456--1461",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:00 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Exploration is the act of investigating unknown
                 regions. An analyst exploring a database cannot, by
                 definition, compose the right query or use the
                 appropriate data mining algorithm. However, current
                 data management tools cannot operate without well
                 defined instructions. Therefore, browsing an unknown
                 database can be a very tedious process. Our project,
                 Atlas, is an attempt to circumvent this problem. Atlas
                 is an active DBMS front-end, designed for database
                 exploration. It generates and ranks several data maps
                 from a user query. A data map is a small set of
                 database queries (less than a dozen), in which each
                 query describes an interesting region of the database.
                 The user can pick one and submit it for further
                 exploration. In order to support interaction, the
                 system should operate in quasi-real time, possibly at
                 the cost of precision, and require as little input
                 parameters as possible. We draft a framework to
                 generate such data maps, and introduce several short-to
                 long-terms research problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Simoes:2013:WSP,
  author =       "Gon{\c{c}}alo Sim{\~o}es and Helena Galhardas and Luis
                 Gravano",
  title =        "When speed has a price: fast information extraction
                 using approximate algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1462--1473",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A wealth of information produced by individuals and
                 organizations is expressed in natural language text.
                 This is a problem since text lacks the explicit
                 structure that is necessary to support rich querying
                 and analysis. Information extraction systems are
                 sophisticated software tools to discover structured
                 information in natural language text. Unfortunately,
                 information extraction is a challenging and
                 time-consuming task. In this paper, we address the
                 limitations of state-of-the-art systems for the
                 optimization of information extraction programs, with
                 the objective of producing efficient extraction
                 executions. Our solution relies on exploiting a wide
                 range of optimization opportunities. For efficiency, we
                 consider a wide spectrum of execution plans, including
                 approximate plans whose results differ in their
                 precision and recall. Our optimizer accounts for these
                 characteristics of the competing execution plans, and
                 uses accurate predictors of their extraction time,
                 recall, and precision. We demonstrate the efficiency
                 and effectiveness of our optimizer through a
                 large-scale experimental evaluation over real-world
                 datasets and multiple extraction tasks and
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chasseur:2013:DES,
  author =       "Craig Chasseur and Jignesh M. Patel",
  title =        "Design and evaluation of storage organizations for
                 read-optimized main memory databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1474--1485",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Existing main memory data processing systems employ a
                 variety of storage organizations and make a number of
                 storage-related design choices. The focus of this paper
                 is on systematically evaluating a number of these key
                 storage design choices for main memory analytical (i.e.
                 read-optimized) database settings. Our evaluation
                 produces a number of key insights: First, it is always
                 beneficial to organize data into self-contained memory
                 blocks rather than large files. Second, both
                 column-stores and row-stores display performance
                 advantages for different types of queries, and for high
                 performance both should be implemented as options for
                 the tuple-storage layout. Third, cache-sensitive
                 B+-tree indices can play a major role in accelerating
                 query performance, especially when used in a
                 block-oriented organization. Finally, compression can
                 also play a role in accelerating query performance
                 depending on data distribution and query selectivity.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2013:ASA,
  author =       "Luying Chen and Stefano Ortona and Giorgio Orsi and
                 Michael Benedikt",
  title =        "Aggregating semantic annotators",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1486--1497",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A growing number of resources are available for
                 enriching documents with semantic annotations. While
                 originally focused on a few standard classes of
                 annotations, the ecosystem of annotators is now
                 becoming increasingly diverse. Although annotators
                 often have very different vocabularies, with both
                 high-level and specialist concepts, they also have many
                 semantic interconnections. We will show that both the
                 overlap and the diversity in annotator vocabularies
                 motivate the need for semantic annotation integration:
                 middleware that produces a unified annotation on top of
                 diverse semantic annotators. On the one hand, the
                 diversity of vocabulary allows applications to benefit
                 from the much richer vocabulary available in an
                 integrated vocabulary. On the other hand, we present
                 evidence that the most widely-used annotators on the
                 web suffer from serious accuracy deficiencies: the
                 overlap in vocabularies from individual annotators
                 allows an integrated annotator to boost accuracy by
                 exploiting inter-annotator agreement and disagreement.
                 The integration of semantic annotations leads to new
                 challenges, both compared to usual data integration
                 scenarios and to standard aggregation of machine
                 learning tools. We overview an approach to these
                 challenges that performs ontology-aware aggregation. We
                 introduce an approach that requires no training data,
                 making use of ideas from database repair. We
                 experimentally compare this with a supervised approach,
                 which adapts maximal entropy Markov models to the
                 setting of ontology-based annotations. We further
                 experimentally compare both these approaches with
                 respect to ontology-unaware supervised approaches, and
                 to individual annotators.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chu:2013:DDC,
  author =       "Xu Chu and Ihab F. Ilyas and Paolo Papotti",
  title =        "Discovering denial constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1498--1509",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Integrity constraints (ICs) provide a valuable tool
                 for enforcing correct application semantics. However,
                 designing ICs requires experts and time. Proposals for
                 automatic discovery have been made for some formalisms,
                 such as functional dependencies and their extension
                 conditional functional dependencies. Unfortunately,
                 these dependencies cannot express many common business
                 rules. For example, an American citizen cannot have
                 lower salary and higher tax rate than another citizen
                 in the same state. In this paper, we tackle the
                 challenges of discovering dependencies in a more
                 expressive integrity constraint language, namely Denial
                 Constraints (DCs). DCs are expressive enough to
                 overcome the limits of previous languages and, at the
                 same time, have enough structure to allow efficient
                 discovery and application in several scenarios. We lay
                 out theoretical and practical foundations for DCs,
                 including a set of sound inference rules and a linear
                 algorithm for implication testing. We then develop an
                 efficient instance-driven DC discovery algorithm and
                 propose a novel scoring function to rank DCs for user
                 validation. Using real-world and synthetic datasets, we
                 experimentally evaluate scalability and effectiveness
                 of our solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2013:DTK,
  author =       "Wenfei Fan and Xin Wang and Yinghui Wu",
  title =        "Diversified top-$k$ graph pattern matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1510--1521",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph pattern matching has been widely used in e.g.,
                 social data analysis. A number of matching algorithms
                 have been developed that, given a graph pattern $Q$ and
                 a graph $G$, compute the set $ M(Q, G) $ of matches of
                 $Q$ in $G$. However, these algorithms often return an
                 excessive number of matches, and are expensive on large
                 real-life social graphs. Moreover, in practice many
                 social queries are to find matches of a specific
                 pattern node, rather than the entire $ M(Q, G) $. This
                 paper studies top- $k$ graph pattern matching. (1) We
                 revise graph pattern matching defined in terms of
                 simulation, by supporting a designated output node $ u
                 o $. Given $G$ and $Q$, it is to find those nodes in $
                 M(Q, G) $ that match $ u o $, instead of the large set
                 $ M(Q, G) $. (2) We study two classes of functions for
                 ranking the matches: relevance functions $ \delta r() $
                 based on, e.g., social impact, and distance functions $
                 \delta d() $ to cover diverse elements. (3) We develop
                 two algorithms for computing top-$k$ matches of $ u o $
                 based on $ \delta r() $, with the early termination
                 property, i.e., they find top-$k$ matches without
                 computing the entire $ M(Q, G) $. (4) We also study
                 diversified top-$k$ matching, a bi-criteria
                 optimization problem based on both $ \delta r() $ and $
                 \delta d() $. We show that its decision problem is
                 NP-complete. Nonetheless, we provide an approximation
                 algorithm with performance guarantees and a heuristic
                 one with the early termination property. (5) Using
                 real-life and synthetic data, we experimentally verify
                 that our (diversified) top-$k$ matching algorithms are
                 effective, and outperform traditional matching
                 algorithms in efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rao:2013:BNF,
  author =       "Weixiong Rao and Lei Chen and Pan Hui and Sasu
                 Tarkoma",
  title =        "{Bitlist}: new full-text index for low space cost and
                 efficient keyword search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1522--1533",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nowadays Web search engines are experiencing
                 significant performance challenges caused by a huge
                 amount of Web pages and increasingly larger number of
                 Web users. The key issue for addressing these
                 challenges is to design a compact structure which can
                 index Web documents with low space and meanwhile
                 process keyword search very fast. Unfortunately, the
                 current solutions typically separate the space
                 optimization from the search improvement. As a result,
                 such solutions either save space yet with search
                 inefficiency, or allow fast keyword search but with
                 huge space requirement. In this paper, to address the
                 challenges, we propose a novel structure bitlist with
                 both low space requirement and supporting fast keyword
                 search. Specifically, based on a simple and yet very
                 efficient encoding scheme, bitlist uses a single number
                 to encode a set of integer document IDs for low space,
                 and adopts fast bitwise operations for very efficient
                 boolean-based keyword search. Our extensive
                 experimental results on real and synthetic data sets
                 verify that bitlist outperforms the recent proposed
                 solution, inverted list compression [23, 22] by
                 spending 36.71\% less space and 61.91\% faster
                 processing time, and achieves comparable running time
                 as [8] but with significantly lower space.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wandelt:2013:RSS,
  author =       "Sebastian Wandelt and Johannes Starlinger and Marc Bux
                 and Ulf Leser",
  title =        "{RCSI}: scalable similarity search in thousand(s) of
                 genomes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1534--1545",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Until recently, genomics has concentrated on comparing
                 sequences between species. However, due to the sharply
                 falling cost of sequencing technology, studies of
                 populations of individuals of the same species are now
                 feasible and promise advances in areas such as
                 personalized medicine and treatment of genetic
                 diseases. A core operation in such studies is read
                 mapping, i.e., finding all parts of a set of genomes
                 which are within edit distance $k$ to a given query
                 sequence ($k$-approximate search). To achieve
                 sufficient speed, current algorithms solve this problem
                 only for one to-be-searched genome and compute only
                 approximate solutions, i.e., they miss some $k$ ---
                 approximate occurrences. We present RCSI, Referentially
                 Compressed Search Index, which scales to a thousand
                 genomes and computes the exact answer. It exploits the
                 fact that genomes of different individuals of the same
                 species are highly similar by first compressing the
                 to-be-searched genomes with respect to a reference
                 genome. Given a query, RCSI then searches the reference
                 and all genome-specific individual differences. We
                 propose efficient data structures for representing
                 compressed genomes and present algorithms for scalable
                 compression and similarity search. We evaluate our
                 algorithms on a set of 1092 human genomes, which amount
                 to approx. 3 TB of raw data. RCSI compresses this set
                 by a ratio of 450:1 (26:1 including the search index)
                 and answers similarity queries on a mid-class server in
                 15 ms on average even for comparably large error
                 thresholds, thereby significantly outperforming other
                 methods. Furthermore, we present a fast and adaptive
                 heuristic for choosing the best reference sequence for
                 referential compression, a problem that was never
                 studied before at this scale.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tao:2013:AMS,
  author =       "Yufei Tao and Xiaocheng Hu and Dong-Wan Choi and
                 Chin-Wan Chung",
  title =        "Approximate {MaxRS} in spatial databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1546--1557",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the maximizing range sum (MaxRS) problem, given (i)
                 a set $P$ of $2$D points each of which is associated
                 with a positive weight, and (ii) a rectangle $r$ of
                 specific extents, we need to decide where to place $r$
                 in order to maximize the covered weight of $r$ --- that
                 is, the total weight of the data points covered by $r$.
                 Algorithms solving the problem exactly entail expensive
                 CPU or I/O cost. In practice, exact answers are often
                 not compulsory in a MaxRS application, where slight
                 imprecision can often be comfortably tolerated,
                 provided that approximate answers can be computed
                 considerably faster. Motivated by this, the present
                 paper studies the $ (1 - \epsilon) $-approximate MaxRS
                 problem, which admits the same inputs as MaxRS, but
                 aims instead to return a rectangle whose covered weight
                 is at least $ (1 - \epsilon) m^* $, where $ m^* $ is
                 the optimal covered weight, and $ \epsilon $ can be an
                 arbitrarily small constant between $0$ and $1$. We
                 present fast algorithms that settle this problem with
                 strong theoretical guarantees.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kimelfeld:2013:MTD,
  author =       "Benny Kimelfeld and Jan Vondr{\'a}k and David P.
                 Woodruff",
  title =        "Multi-tuple deletion propagation: approximations and
                 complexity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1558--1569",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper studies the computational complexity of the
                 classic problem of deletion propagation in a relational
                 database, where tuples are deleted from the base
                 relations in order to realize a desired deletion of
                 tuples from the view. Such an operation may result in a
                 (sometimes unavoidable) side effect: deletion of
                 additional tuples from the view, besides the
                 intentionally deleted ones. The goal is to minimize the
                 side effect. The complexity of this problem has been
                 well studied in the case where only a single tuple is
                 deleted from the view. However, only little is known
                 within the more realistic scenario of multi-tuple
                 deletion, which is the topic of this paper. The class
                 of conjunctive queries (CQs) is among the most well
                 studied in the literature, and we focus here on views
                 defined by CQs that are self-join free (sjf-CQs). Our
                 main result is a trichotomy in complexity, classifying
                 all sjf-CQs into three categories: those for which the
                 problem is in polynomial time, those for which the
                 problem is NP-hard but polynomial-time approximable (by
                 a constant-factor), and those for which even an
                 approximation (by any factor) is NP-hard to obtain. A
                 corollary of this trichotomy is a dichotomy in the
                 complexity of deciding whether a side-effect-free
                 solution exists, in the multi-tuple case. We further
                 extend the full classification to accommodate the
                 presence of a constant upper bound on the number of
                 view tuples to delete, and the presence of functional
                 dependencies. Finally, we establish (positive and
                 negative) complexity results on approximability for the
                 dual problem of maximizing the number of view tuples
                 surviving (rather than minimizing the side effect
                 incurred in) the deletion propagation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2013:SDF,
  author =       "Badrish Chandramouli and Suman Nath and Wenchao Zhou",
  title =        "Supporting distributed feed-following apps over edge
                 devices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1570--1581",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In feed-following applications such as Twitter and
                 Facebook, users (consumers) follow a large number of
                 other users (producers) to get personalized feeds,
                 generated by blending producers- feeds. With the
                 proliferation of Cloud-connected smart edge devices
                 such as smartphones, producers and consumers of many
                 feed-following applications reside on edge devices and
                 the Cloud. An important design goal of such
                 applications is to minimize communication (and energy)
                 overhead of edge devices. In this paper, we abstract
                 distributed feed-following applications as a view
                 maintenance problem, with the goal of optimally placing
                 the views on edge devices and in the Cloud to minimize
                 communication overhead between edge devices and the
                 Cloud. The view placement problem for general network
                 topology is NP Hard; however, we show that for the
                 special case of Cloud-edge topology, locally optimal
                 solutions yield a globally optimal view placement
                 solution. Based on this powerful result, we propose
                 view placement algorithms that are highly efficient,
                 yet provably minimize global network cost. Compared to
                 existing works on feed-following applications, our
                 algorithms are more general--they support views with
                 selection, projection, correlation (join) and arbitrary
                 black-box operators, and can even refer to other views.
                 We have implemented our algorithms within a distributed
                 feed-following architecture over real smartphones and
                 the Cloud. Experiments over real datasets indicate that
                 our algorithms are highly scalable and
                 orders-of-magnitude more efficient than existing
                 strategies for optimal placement. Further, our results
                 show that optimal placements generated by our
                 algorithms are often several factors better than
                 simpler schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Thirumuruganathan:2013:RDW,
  author =       "Saravanan Thirumuruganathan and Nan Zhang and Gautam
                 Das",
  title =        "Rank discovery from web databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1582--1593",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many web databases are only accessible through a
                 proprietary search interface which allows users to form
                 a query by entering the desired values for a few
                 attributes. After receiving a query, the system returns
                 the top-$k$ matching tuples according to a
                 pre-determined ranking function. Since the rank of a
                 tuple largely determines the attention it receives from
                 website users, ranking information for any tuple ---
                 not just the top-ranked ones --- is often of
                 significant interest to third parties such as sellers,
                 customers, market researchers and investors. In this
                 paper, we define a novel problem of rank discovery over
                 hidden web databases. We introduce a taxonomy of
                 ranking functions, and show that different types of
                 ranking functions require fundamentally different
                 approaches for rank discovery. Our technical
                 contributions include principled and efficient
                 randomized algorithms for estimating the rank of a
                 given tuple, as well as negative results which
                 demonstrate the inefficiency of any deterministic
                 algorithm. We show extensive experimental results over
                 real-world databases, including an online experiment at
                 Amazon.com, which illustrates the effectiveness of our
                 proposed techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rekatsinas:2013:SPS,
  author =       "Theodoros Rekatsinas and Amol Deshpande and Ashwin
                 Machanavajjhala",
  title =        "{SPARSI}: partitioning sensitive data amongst multiple
                 adversaries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1594--1605",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present SPARSI, a novel theoretical framework for
                 partitioning sensitive data across multiple
                 non-colluding adversaries. Most work in privacy-aware
                 data sharing has considered disclosing summaries where
                 the aggregate information about the data is preserved,
                 but sensitive user information is protected.
                 Nonetheless, there are applications, including online
                 advertising, cloud computing and crowdsourcing markets,
                 where detailed and fine-grained user data must be
                 disclosed. We consider a new data sharing paradigm and
                 introduce the problem of privacy-aware data
                 partitioning, where a sensitive dataset must be
                 partitioned among $k$ untrusted parties (adversaries).
                 The goal is to maximize the utility derived by
                 partitioning and distributing the dataset, while
                 minimizing the total amount of sensitive information
                 disclosed. The data should be distributed so that an
                 adversary, without colluding with other adversaries,
                 cannot draw additional inferences about the private
                 information, by linking together multiple pieces of
                 information released to her. The assumption of no
                 collusion is both reasonable and necessary in the above
                 application domains that require release of private
                 user information. SPARSI enables us to formally define
                 privacy-aware data partitioning using the notion of
                 sensitive properties for modeling private information
                 and a hypergraph representation for describing the
                 interdependencies between data entries and private
                 information. We show that solving privacy-aware
                 partitioning is, in general, NP-hard, but for specific
                 information disclosure functions, good approximate
                 solutions can be found using relaxation techniques.
                 Finally, we present a local search algorithm applicable
                 to generic information disclosure functions. We conduct
                 a rigorous performance evaluation with real-world and
                 synthetic datasets that illustrates the effectiveness
                 of SPARSI at partitioning sensitive data while
                 minimizing disclosure.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deng:2013:SCC,
  author =       "Dong Deng and Yu Jiang and Guoliang Li and Jian Li and
                 Cong Yu",
  title =        "Scalable column concept determination for {Web} tables
                 using large knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1606--1617",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Tabular data on the Web has become a rich source of
                 structured data that is useful for ordinary users to
                 explore. Due to its potential, tables on the Web have
                 recently attracted a number of studies with the goals
                 of understanding the semantics of those Web tables and
                 providing effective search and exploration mechanisms
                 over them. An important part of table understanding and
                 search is column concept determination, i.e.,
                 identifying the most appropriate concepts associated
                 with the columns of the tables. The problem becomes
                 especially challenging with the availability of
                 increasingly rich knowledge bases that contain hundreds
                 of millions of entities. In this paper, we focus on an
                 important instantiation of the column concept
                 determination problem, namely, the concepts of a column
                 are determined by fuzzy matching its cell values to the
                 entities within a large knowledge base. We provide an
                 efficient and scalable MapReduce-based solution that is
                 scalable to both the number of tables and the size of
                 the knowledge base and propose two novel techniques:
                 knowledge concept aggregation and knowledge entity
                 partition. We prove that both the problem of finding
                 the optimal aggregation strategy and that of finding
                 the optimal partition strategy are NP-hard, and propose
                 efficient heuristic techniques by leveraging the
                 hierarchy of the knowledge base. Experimental results
                 on real-world datasets show that our method achieves
                 high annotation quality and performance, and scales
                 well.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2013:TKS,
  author =       "Xin Huang and Hong Cheng and Rong-Hua Li and Lu Qin
                 and Jeffrey Xu Yu",
  title =        "top-$k$ structural diversity search in large
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1618--1629",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Social contagion depicts a process of information
                 (e.g., fads, opinions, news) diffusion in the online
                 social networks. A recent study reports that in a
                 social contagion process the probability of contagion
                 is tightly controlled by the number of connected
                 components in an individual's neighborhood. Such a
                 number is termed structural diversity of an individual
                 and it is shown to be a key predictor in the social
                 contagion process. Based on this, a fundamental issue
                 in a social network is to find top-$k$ users with the
                 highest structural diversities. In this paper, we, for
                 the first time, study the top-$k$ structural diversity
                 search problem in a large network. Specifically, we
                 develop an effective upper bound of structural
                 diversity for pruning the search space. The upper bound
                 can be incrementally refined in the search process.
                 Based on such upper bound, we propose an efficient
                 framework for top-$k$ structural diversity search. To
                 further speed up the structural diversity evaluation in
                 the search process, several carefully devised heuristic
                 search strategies are proposed. Extensive experimental
                 studies are conducted in 13 real-world large networks,
                 and the results demonstrate the efficiency and
                 effectiveness of the proposed methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cavalieri:2013:SCX,
  author =       "Federico Cavalieri and Alessandro Solimando and
                 Giovanna Guerrini",
  title =        "Synthetising changes in {XML} documents as {PULs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "13",
  pages =        "1630--1641",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:09 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ability of efficiently detecting changes in XML
                 documents is crucial in many application contexts. If
                 such changes are represented as XQuery Update Pending
                 Update Lists (PULs), they can then be applied on
                 documents using XQuery Update engines, and document
                 management can take advantage of existing composition,
                 inversion, reconciliation approaches developed in the
                 update processing context. The paper presents an XML
                 edit-script generator with the unique characteristic of
                 using PULs as edit-script language and improving the
                 state of the art from both the performance and the
                 generated edit-script quality perspectives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2013:PQR,
  author =       "Lei Zhang and Thanh Tran and Achim Rettinger",
  title =        "Probabilistic query rewriting for efficient and
                 effective keyword search on graph data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1642--1653",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of rewriting keyword search queries on
                 graph data has been studied recently, where the main
                 goal is to clean user queries by rewriting keywords as
                 valid tokens appearing in the data and grouping them
                 into meaningful segments. The main solution to this
                 problem employs heuristics for ranking query rewrites
                 and a dynamic programming algorithm for computing them.
                 Based on a broader set of queries defined by an
                 existing benchmark, we show that the use of these
                 heuristics does not yield good results. We propose a
                 novel probabilistic framework, which enables the
                 optimality of a query rewrite to be estimated in a more
                 principled way. We show that our approach outperforms
                 existing work in terms of effectiveness and efficiency
                 of query rewriting. More importantly, we provide the
                 first results indicating query rewriting can indeed
                 improve overall keyword search runtime performance and
                 result quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schaler:2013:QBH,
  author =       "Martin Sch{\"a}ler and Alexander Grebhahn and Reimar
                 Schr{\"o}ter and Sandro Schulze and Veit K{\"o}ppen and
                 Gunter Saake",
  title =        "{QuEval}: beyond high-dimensional indexing {\`a} la
                 carte",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1654--1665",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the recent past, the amount of high-dimensional
                 data, such as feature vectors extracted from multimedia
                 data, increased dramatically. A large variety of
                 indexes have been proposed to store and access such
                 data efficiently. However, due to specific requirements
                 of a certain use case, choosing an adequate index
                 structure is a complex and time-consuming task. This
                 may be due to engineering challenges or open research
                 questions. To overcome this limitation, we present
                 QuEval, an open-source framework that can be flexibly
                 extended w.r.t. index structures, distance metrics, and
                 data sets. QuEval provides a unified environment for a
                 sound evaluation of different indexes, for instance, to
                 support tuning of indexes. In an empirical evaluation,
                 we show how to apply our framework, motivate benefits,
                 and demonstrate analysis possibilities.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2013:DLL,
  author =       "Yuhong Li and Leong Hou U. and Man Lung Yiu and Zhiguo
                 Gong",
  title =        "Discovering longest-lasting correlation in sequence
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1666--1677",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Most existing work on sequence databases use
                 correlation (e.g., Euclidean distance and Pearson
                 correlation) as a core function for various analytical
                 tasks. Typically, it requires users to set a length for
                 the similarity queries. However, there is no steady way
                 to define the proper length on different application
                 needs. In this work we focus on discovering
                 longest-lasting highly correlated subsequences in
                 sequence databases, which is particularly useful in
                 helping those analyses without prior knowledge about
                 the query length. Surprisingly, there has been limited
                 work on this problem. A baseline solution is to
                 calculate the correlations for every possible
                 subsequence combination. Obviously, the brute force
                 solution is not scalable for large datasets. In this
                 work we study a space-constrained index that gives a
                 tight correlation bound for subsequences of similar
                 length and offset by intra-object grouping and
                 inter-object grouping techniques. To the best of our
                 knowledge, this is the first index to support
                 normalized distance metric of arbitrary length
                 subsequences. Extensive experimental evaluation on both
                 real and synthetic sequence datasets verifies the
                 efficiency and effectiveness of our proposed methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Popescu:2013:PTP,
  author =       "Adrian Daniel Popescu and Andrey Balmin and Vuk
                 Ercegovac and Anastasia Ailamaki",
  title =        "{PREDIcT}: towards predicting the runtime of large
                 scale iterative analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1678--1689",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Machine learning algorithms are widely used today for
                 analytical tasks such as data cleaning, data
                 categorization, or data filtering. At the same time,
                 the rise of social media motivates recent uptake in
                 large scale graph processing. Both categories of
                 algorithms are dominated by iterative subtasks, i.e.,
                 processing steps which are executed repetitively until
                 a convergence condition is met. Optimizing cluster
                 resource allocations among multiple workloads of
                 iterative algorithms motivates the need for estimating
                 their runtime, which in turn requires: (i) predicting
                 the number of iterations, and (ii) predicting the
                 processing time of each iteration. As both parameters
                 depend on the characteristics of the dataset and on the
                 convergence function, estimating their values before
                 execution is difficult. This paper proposes PREDIcT, an
                 experimental methodology for predicting the runtime of
                 iterative algorithms. PREDIcT uses sample runs for
                 capturing the algorithm's convergence trend and
                 per-iteration key input features that are well
                 correlated with the actual processing requirements of
                 the complete input dataset. Using this combination of
                 characteristics we predict the runtime of iterative
                 algorithms, including algorithms with very different
                 runtime patterns among subsequent iterations. Our
                 experimental evaluation of multiple algorithms on
                 scale-free graphs shows a relative prediction error of
                 10\%--30\% for predicting runtime, including algorithms
                 with up to $ 100 \times $ runtime variability among
                 consecutive iterations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhao:2013:ERW,
  author =       "Xiaohan Zhao and Adelbert Chang and Atish Das Sarma
                 and Haitao Zheng and Ben Y. Zhao",
  title =        "On the embeddability of random walk distances",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1690--1701",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analysis of large graphs is critical to the ongoing
                 growth of search engines and social networks. One class
                 of queries centers around node affinity, often
                 quantified by random-walk distances between node pairs,
                 including hitting time, commute time, and personalized
                 PageRank (PPR). Despite the potential of these
                 ``metrics,'' they are rarely, if ever, used in
                 practice, largely due to extremely high computational
                 costs. In this paper, we investigate methods to
                 scalably and efficiently compute random-walk distances,
                 by ``embedding'' graphs and distances into points and
                 distances in geometric coordinate spaces. We show that
                 while existing graph coordinate systems (GCS) can
                 accurately estimate shortest path distances, they
                 produce significant errors when embedding random-walk
                 distances. Based on our observations, we propose a new
                 graph embedding system that explicitly accounts for
                 per-node graph properties that affect random walk.
                 Extensive experiments on a range of graphs show that
                 our new approach can accurately estimate both symmetric
                 and asymmetric random-walk distances. Once a graph is
                 embedded, our system can answer queries between any two
                 nodes in 8 microseconds, orders of magnitude faster
                 than existing methods. Finally, we show that our system
                 produces estimates that can replace ground truth in
                 applications with minimal impact on application
                 output.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Muhlbauer:2013:ILM,
  author =       "Tobias M{\"u}hlbauer and Wolf R{\"o}diger and Robert
                 Seilbeck and Angelika Reiser and Alfons Kemper and
                 Thomas Neumann",
  title =        "Instant loading for main memory databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1702--1713",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "eScience and big data analytics applications are
                 facing the challenge of efficiently evaluating complex
                 queries over vast amounts of structured text data
                 archived in network storage solutions. To analyze such
                 data in traditional disk-based database systems, it
                 needs to be bulk loaded, an operation whose performance
                 largely depends on the wire speed of the data source
                 and the speed of the data sink, i.e., the disk. As the
                 speed of network adapters and disks has stagnated in
                 the past, loading has become a major bottleneck. The
                 delays it is causing are now ubiquitous as text formats
                 are a preferred storage format for reasons of
                 portability. But the game has changed: Ever increasing
                 main memory capacities have fostered the development of
                 in-memory database systems and very fast network
                 infrastructures are on the verge of becoming
                 economical. While hardware limitations for fast loading
                 have disappeared, current approaches for main memory
                 databases fail to saturate the now available wire
                 speeds of tens of Gbit/s. With Instant Loading, we
                 contribute a novel CSV loading approach that allows
                 scalable bulk loading at wire speed. This is achieved
                 by optimizing all phases of loading for modern
                 super-scalar multi-core CPUs. Large main memory
                 capacities and Instant Loading thereby facilitate a
                 very efficient data staging processing model consisting
                 of instantaneous load-work-unload cycles across data
                 archives on a single node. Once data is loaded, updates
                 and queries are efficiently processed with the
                 flexibility, security, and high performance of
                 relational main memory databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alexiou:2013:ARF,
  author =       "Karolina Alexiou and Donald Kossmann and Per-{\AA}ke
                 Larson",
  title =        "Adaptive range filters for cold data: avoiding trips
                 to {Siberia}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1714--1725",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Bloom filters are a great technique to test whether a
                 key is not in a set of keys. This paper presents a
                 novel data structure called ARF. In a nutshell, ARFs
                 are for range queries what Bloom filters are for point
                 queries. That is, an ARF can determine whether a set of
                 keys does not contain any keys that are part of a
                 specific range. This paper describes the principles and
                 methods for efficient implementation of ARFs and
                 presents the results of comprehensive experiments that
                 assess the precision, space, and latency of ARFs.
                 Furthermore, this paper shows how ARFs can be applied
                 to a commercial database system that partitions data
                 into hot and cold regions to optimize queries that
                 involve only hot data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2013:SPA,
  author =       "Badrish Chandramouli and Jonathan Goldstein and Abdul
                 Quamar",
  title =        "Scalable progressive analytics on big data in the
                 {Cloud}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1726--1737",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analytics over the increasing quantity of data stored
                 in the Cloud has become very expensive, particularly
                 due to the pay-as-you-go Cloud computation model. Data
                 scientists typically manually extract samples of
                 increasing data size (progressive samples) using
                 domain-specific sampling strategies for exploratory
                 querying. This provides them with user-control,
                 repeatable semantics, and result provenance. However,
                 such solutions result in tedious workflows that
                 preclude the reuse of work across samples. On the other
                 hand, existing approximate query processing systems
                 report early results, but do not offer the above
                 benefits for complex ad-hoc queries. We propose a new
                 progressive analytics system based on a progress model
                 called Prism that (1) allows users to communicate
                 progressive samples to the system; (2) allows efficient
                 and deterministic query processing over samples; and
                 (3) provides repeatable semantics and provenance to
                 data scientists. We show that one can realize this
                 model for atemporal relational queries using an
                 unmodified temporal streaming engine, by
                 re-interpreting temporal event fields to denote
                 progress. Based on Prism, we build Now!, a progressive
                 data-parallel computation framework for Windows Azure,
                 where progress is understood as a first-class citizen
                 in the framework. Now! works with ``progress-aware
                 reducers''- in particular, it works with streaming
                 engines to support progressive SQL over big data.
                 Extensive experiments on Windows Azure with real and
                 synthetic workloads validate the scalability and
                 benefits of Now! and its optimizations, over current
                 solutions for progressive analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ogden:2013:SXQ,
  author =       "Peter Ogden and David Thomas and Peter Pietzuch",
  title =        "Scalable {XML} query processing using parallel
                 pushdown transducers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1738--1749",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In online social networking, network monitoring and
                 financial applications, there is a need to query high
                 rate streams of XML data, but methods for executing
                 individual XPath queries on streaming XML data have not
                 kept pace with multicore CPUs. For data-parallel
                 processing, a single XML stream is typically split into
                 well-formed fragments, which are then processed
                 independently. Such an approach, however, introduces a
                 sequential bottleneck and suffers from low cache
                 locality, limiting its scalability across CPU cores. We
                 describe a data-parallel approach for the processing of
                 streaming XPath queries based on pushdown transducers.
                 Our approach permits XML data to be split into
                 arbitrarily-sized chunks, with each chunk processed by
                 a parallel automaton instance. Since chunks may be
                 malformed, our automata consider all possible starting
                 states for XML elements and build mappings from
                 starting to finishing states. These mappings can be
                 constructed independently for each chunk by different
                 CPU cores. For streaming queries from the XPathMark
                 benchmark, we show a processing throughput of 2.5 GB/s,
                 with near linear scaling up to 64 CPU cores.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huai:2013:UIB,
  author =       "Yin Huai and Siyuan Ma and Rubao Lee and Owen O'Malley
                 and Xiaodong Zhang",
  title =        "Understanding insights into the basic structure and
                 essential issues of table placement methods in
                 clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1750--1761",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A table placement method is a critical component in
                 big data analytics on distributed systems. It
                 determines the way how data values in a two-dimensional
                 table are organized and stored in the underlying
                 cluster. Based on Hadoop computing environments,
                 several table placement methods have been proposed and
                 implemented. However, a comprehensive and systematic
                 study to understand, to compare, and to evaluate
                 different table placement methods has not been done.
                 Thus, it is highly desirable to gain important insights
                 into the basic structure and essential issues of table
                 placement methods in the context of big data processing
                 infrastructures. In this paper, we present such a
                 study. The basic structure of a data placement method
                 consists of three core operations: row reordering,
                 table partitioning, and data packing. All the existing
                 placement methods are formed by these core operations
                 with variations made by the three key factors: (1) the
                 size of a horizontal logical subset of a table (or the
                 size of a row group), (2) the function of mapping
                 columns to column groups, and (3) the function of
                 packing columns or column groups in a row group into
                 physical blocks. We have designed and implemented a
                 benchmarking tool to provide insights into how
                 variations of each factor affect the I/O performance of
                 reading data of a table stored by a table placement
                 method. Based on our results, we give suggested actions
                 to optimize table reading performance. Results from
                 large-scale experiments have also confirmed that our
                 findings are valid for production workloads. Finally,
                 we present ORC File as a case study to show the
                 effectiveness of our findings and suggested actions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mottin:2013:POF,
  author =       "Davide Mottin and Alice Marascu and Senjuti Basu Roy
                 and Gautam Das and Themis Palpanas and Yannis
                 Velegrakis",
  title =        "A probabilistic optimization framework for the
                 empty-answer problem",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1762--1773",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose a principled optimization-based interactive
                 query relaxation framework for queries that return no
                 answers. Given an initial query that returns an empty
                 answer set, our framework dynamically computes and
                 suggests alternative queries with less conditions than
                 those the user has initially requested, in order to
                 help the user arrive at a query with a non-empty
                 answer, or at a query for which no matter how many
                 additional conditions are ignored, the answer will
                 still be empty. Our proposed approach for suggesting
                 query relaxations is driven by a novel probabilistic
                 framework based on optimizing a wide variety of
                 application-dependent objective functions. We describe
                 optimal and approximate solutions of different
                 optimization problems using the framework. We analyze
                 these solutions, experimentally verify their efficiency
                 and effectiveness, and illustrate their advantage over
                 the existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2013:SAG,
  author =       "Yinghui Wu and Shengqi Yang and Mudhakar Srivatsa and
                 Arun Iyengar and Xifeng Yan",
  title =        "Summarizing answer graphs induced by keyword queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1774--1785",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Keyword search has been popularly used to query graph
                 data. Due to the lack of structure support, a keyword
                 query might generate an excessive number of matches,
                 referred to as ``answer graphs'', that could include
                 different relationships among keywords. An ignored yet
                 important task is to group and summarize answer graphs
                 that share similar structures and contents for better
                 query interpretation and result understanding. This
                 paper studies the summarization problem for the answer
                 graphs induced by a keyword query $Q$. (1) A notion of
                 summary graph is proposed to characterize the
                 summarization of answer graphs. Given $Q$ and a set of
                 answer graphs $G$, a summary graph preserves the
                 relation of the keywords in $Q$ by summarizing the
                 paths connecting the keywords nodes in $G$. (2) A
                 quality metric of summary graphs, called coverage
                 ratio, is developed to measure information loss of
                 summarization. (3) Based on the metric, a set of
                 summarization problems are formulated, which aim to
                 find minimized summary graphs with certain coverage
                 ratio. (a) We show that the complexity of these
                 summarization problems ranges from ptime to
                 NP-complete. (b) We provide exact and heuristic
                 summarization algorithms. (4) Using real-life and
                 synthetic graphs, we experimentally verify the
                 effectiveness and the efficiency of our techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Duan:2013:SKS,
  author =       "Huizhong Duan and ChengXiang Zhai and Jinxing Cheng
                 and Abhishek Gattani",
  title =        "Supporting keyword search in product database: a
                 probabilistic approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1786--1797",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ability to let users search for products
                 conveniently in product database is critical to the
                 success of e-commerce. Although structured query
                 languages (e.g. SQL) can be used to effectively access
                 the product database, it is very difficult for end
                 users to learn and use. In this paper, we study how to
                 optimize search over structured product entities
                 (represented by specifications) with keyword queries
                 such as ``cheap gaming laptop''. One major difficulty
                 in this problem is the vocabulary gap between the
                 specifications of products in the database and the
                 keywords people use in search queries. To solve the
                 problem, we propose a novel probabilistic entity
                 retrieval model based on query generation, where the
                 entities would be ranked for a given keyword query
                 based on the likelihood that a user who likes an entity
                 would pose the query. Different ways to estimate the
                 model parameters would lead to different variants of
                 ranking functions. We start with simple estimates based
                 on the specifications of entities, and then leverage
                 user reviews and product search logs to improve the
                 estimation. Multiple estimation algorithms are
                 developed based on Maximum Likelihood and Maximum a
                 Posteriori estimators. We evaluate the proposed product
                 entity retrieval models on two newly created product
                 search test collections. The results show that the
                 proposed model significantly outperforms the existing
                 retrieval models, benefiting from the modeling of
                 attribute-level relevance. Despite the focus on product
                 retrieval, the proposed modeling method is general and
                 opens up many new opportunities in analyzing structured
                 entity data with unstructured text data. We show the
                 proposed probabilistic model can be easily adapted for
                 many interesting applications including facet
                 generation and review annotation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nirkhiwale:2013:SAA,
  author =       "Supriya Nirkhiwale and Alin Dobra and Christopher
                 Jermaine",
  title =        "A sampling algebra for aggregate estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1798--1809",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As of 2005, sampling has been incorporated in all
                 major database systems. While efficient sampling
                 techniques are realizable, determining the accuracy of
                 an estimate obtained from the sample is still an
                 unresolved problem. In this paper, we present a
                 theoretical framework that allows an elegant treatment
                 of the problem. We base our work on generalized uniform
                 sampling (GUS), a class of sampling methods that
                 subsumes a wide variety of sampling techniques. We
                 introduce a key notion of equivalence that allows GUS
                 sampling operators to commute with selection and join,
                 and derivation of confidence intervals. We illustrate
                 the theory through extensive examples and give
                 indications on how to use it to provide meaningful
                 estimates in database systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dylla:2013:TPD,
  author =       "Maximilian Dylla and Iris Miliaraki and Martin
                 Theobald",
  title =        "A temporal-probabilistic database model for
                 information extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1810--1821",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Temporal annotations of facts are a key component both
                 for building a high-accuracy knowledge base and for
                 answering queries over the resulting temporal knowledge
                 base with high precision and recall. In this paper, we
                 present a temporal-probabilistic database model for
                 cleaning uncertain temporal facts obtained from
                 information extraction methods. Specifically, we
                 consider a combination of temporal deduction rules,
                 temporal consistency constraints and probabilistic
                 inference based on the common possible-worlds semantics
                 with data lineage, and we study the theoretical
                 properties of this data model. We further develop a
                 query engine which is capable of scaling to very large
                 temporal knowledge bases, with nearly interactive query
                 response times over millions of uncertain facts and
                 hundreds of thousands of grounded rules. Our
                 experiments over two real-world datasets demonstrate
                 the increased robustness of our approach compared to
                 related techniques based on constraint solving via
                 Integer Linear Programming (ILP) and probabilistic
                 inference via Markov Logic Networks (MLNs). We are also
                 able to show that our runtime performance is more than
                 competitive to current ILP solvers and the fastest
                 available, probabilistic but non-temporal, database
                 engines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fender:2013:CSG,
  author =       "Pit Fender and Guido Moerkotte",
  title =        "Counter strike: generic top-down join enumeration for
                 hypergraphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1822--1833",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding the optimal execution order of join operations
                 is a crucial task of today's cost-based query
                 optimizers. There are two approaches to identify the
                 best plan: bottom-up and top-down join enumeration. But
                 only the top-down approach allows for branch-and-bound
                 pruning, which can improve compile time by several
                 orders of magnitude while still preserving optimality.
                 For both optimization strategies, efficient enumeration
                 algorithms have been published. However, there are two
                 severe limitations for the top-down approach: The
                 published algorithms can handle only (1) simple
                 (binary) join predicates and (2) inner joins. Since
                 real queries may contain complex join predicates
                 involving more than two relations, and outer joins as
                 well as other non-inner joins, efficient top-down join
                 enumeration cannot be used in practice yet. We develop
                 a novel top-down join enumeration algorithm that
                 overcomes these two limitations. Furthermore, we show
                 that our new algorithm is competitive when compared to
                 the state of the art in bottom-up processing even
                 without playing out its advantage by making use of its
                 branch-and-bound pruning capabilities.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Achakeev:2013:EBU,
  author =       "Daniar Achakeev and Bernhard Seeger",
  title =        "Efficient bulk updates on multiversion {B}-trees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1834--1845",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Partial persistent index structures support efficient
                 access to current and past versions of objects, while
                 updates are allowed on the current version. The
                 Multiversion B-Tree (MVBT) represents a partially
                 persistent index-structure with both, asymptotic
                 worst-case performance and excellent performance in
                 real life applications. Updates are performed
                 tuple-by-tuple with the same asymptotic performance as
                 for standard B+trees. To the best of our knowledge,
                 there is no efficient algorithm for bulk loading and
                 bulk update of MVBT and other partially persistent
                 index structures. In this paper, we propose the first
                 loading algorithm for MVBT that meets the lower-bound
                 of external sorting. In addition, our approach is also
                 applicable to bulk updates. This is achieved by
                 combining two basic technologies, weight balancing and
                 buffer tree. Our extensive set of experiments confirm
                 the theoretical findings: Our loading algorithm runs
                 considerably faster than performing updates
                 tuple-by-tuple.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Altwaijry:2013:QDA,
  author =       "Hotham Altwaijry and Dmitri V. Kalashnikov and Sharad
                 Mehrotra",
  title =        "Query-driven approach to entity resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1846--1857",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper explores ``on-the-fly'' data cleaning in
                 the context of a user query. A novel Query-Driven
                 Approach (QDA) is developed that performs a minimal
                 number of cleaning steps that are only necessary to
                 answer a given selection query correctly. The
                 comprehensive empirical evaluation of the proposed
                 approach demonstrates its significant advantage in
                 terms of efficiency over traditional techniques for
                 query-driven applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Szlichta:2013:ECO,
  author =       "Jaros{\l}aw Szlichta and Parke Godfrey and Jarek Gryz
                 and Calisto Zuzarte",
  title =        "Expressiveness and complexity of order dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1858--1869",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Dependencies play an important role in databases. We
                 study order dependencies (ODs)--and unidirectional
                 order dependencies (UODs), a proper sub-class of
                 ODs--which describe the relationships among
                 lexicographical orderings of sets of tuples. We
                 consider lexicographical ordering, as by the order-by
                 operator in SQL, because this is the notion of order
                 used in SQL and within query optimization. Our main
                 goal is to investigate the inference problem for ODs,
                 both in theory and in practice. We show the usefulness
                 of ODs in query optimization. We establish the
                 following theoretical results: (i) a hierarchy of order
                 dependency classes; (ii) a proof of co-NP-completeness
                 of the inference problem for the subclass of UODs (and
                 ODs); (iii) a proof of co-NP-completeness of the
                 inference problem of functional dependencies (FDs) from
                 ODs in general, but demonstrate linear time complexity
                 for the inference of FDs from UODs; (iv) a sound and
                 complete elimination procedure for inference over ODs;
                 and (v) a sound and complete polynomial inference
                 algorithm for sets of UODs over restricted domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pavan:2013:CST,
  author =       "A. Pavan and Kanat Tangwongsan and Srikanta Tirthapura
                 and Kun-Lung Wu",
  title =        "Counting and sampling triangles from a graph stream",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1870--1881",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents a new space-efficient algorithm
                 for counting and sampling triangles--and more
                 generally, constant-sized cliques--in a massive graph
                 whose edges arrive as a stream. Compared to prior work,
                 our algorithm yields significant improvements in the
                 space and time complexity for these fundamental
                 problems. Our algorithm is simple to implement and has
                 very good practical performance on large graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sowell:2013:EAI,
  author =       "Benjamin Sowell and Marcos Vaz Salles and Tuan Cao and
                 Alan Demers and Johannes Gehrke",
  title =        "An experimental analysis of iterated spatial joins in
                 main memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1882--1893",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many modern applications rely on high-performance
                 processing of spatial data. Examples include
                 location-based services, games, virtual worlds, and
                 scientific simulations such as molecular dynamics and
                 behavioral simulations. These applications deal with
                 large numbers of moving objects that continuously sense
                 their environment, and their data access can often be
                 abstracted as a repeated spatial join. Updates to
                 object positions are interspersed with these join
                 operations, and batched for performance. Even for the
                 most demanding scenarios, the data involved in these
                 joins fits comfortably in the main memory of a cluster
                 of machines, and most applications run completely in
                 main memory for performance reasons. Choosing
                 appropriate spatial join algorithms is challenging due
                 to the large number of techniques in the literature. In
                 this paper, we perform an extensive evaluation of
                 repeated spatial join algorithms for distance (range)
                 queries in main memory. Our study is unique in breadth
                 when compared to previous work: We implement, tune, and
                 compare ten distinct algorithms on several workloads
                 drawn from the simulation and spatial indexing
                 literature. We explore the design space of both index
                 nested loops algorithms and specialized join
                 algorithms, as well as the use of moving object indices
                 that can be incrementally maintained. Surprisingly, we
                 find that when queries and updates can be batched,
                 repeatedly re-computing the join result from scratch
                 outperforms using a moving object index in all but the
                 most extreme cases. This suggests that--given the code
                 complexity of index structures for moving objects ---
                 specialized join strategies over simple index
                 structures, such as Synchronous Traversal over R-Trees,
                 should be the methods of choice for the above
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2013:SQB,
  author =       "Kisung Lee and Ling Liu",
  title =        "Scaling queries over big {RDF} graphs with semantic
                 hash partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1894--1905",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Massive volumes of big RDF data are growing beyond the
                 performance capacity of conventional RDF data
                 management systems operating on a single node.
                 Applications using large RDF data demand efficient data
                 partitioning solutions for supporting RDF data access
                 on a cluster of compute nodes. In this paper we present
                 a novel semantic hash partitioning approach and
                 implement a Semantic HAsh Partitioning-Enabled
                 distributed RDF data management system, called Shape.
                 This paper makes three original contributions. First,
                 the semantic hash partitioning approach we propose
                 extends the simple hash partitioning method through
                 direction-based triple groups and direction-based
                 triple replications. The latter enhances the former by
                 controlled data replication through intelligent
                 utilization of data access locality, such that queries
                 over big RDF graphs can be processed with zero or very
                 small amount of inter-machine communication cost.
                 Second, we generate locality-optimized query execution
                 plans that are more efficient than popular multi-node
                 RDF data management systems by effectively minimizing
                 the inter-machine communication cost for query
                 processing. Third but not the least, we provide a suite
                 of locality-aware optimization techniques to further
                 reduce the partition size and cut down on the
                 inter-machine communication cost during distributed
                 query processing. Experimental results show that our
                 system scales well and can process big RDF datasets
                 more efficiently than existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Seo:2013:DSD,
  author =       "Jiwon Seo and Jongsoo Park and Jaeho Shin and Monica
                 S. Lam",
  title =        "Distributed socialite: a datalog-based language for
                 large-scale graph analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1906--1917",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale graph analysis is becoming important with
                 the rise of world-wide social network services.
                 Recently in SociaLite, we proposed extensions to
                 Datalog to efficiently and succinctly implement graph
                 analysis programs on sequential machines. This paper
                 describes novel extensions and optimizations of
                 SociaLite for parallel and distributed executions to
                 support large-scale graph analysis. With distributed
                 SociaLite, programmers simply annotate how data are to
                 be distributed, then the necessary communication is
                 automatically inferred to generate parallel code for
                 cluster of multi-core machines. It optimizes the
                 evaluation of recursive monotone aggregate functions
                 using a delta stepping technique. In addition,
                 approximate computation is supported in SociaLite,
                 allowing programmers to trade off accuracy for less
                 time and space. We evaluated SociaLite with six core
                 graph algorithms used in many social network analyses.
                 Our experiment with 64 Amazon EC2 8-core instances
                 shows that SociaLite programs performed within a factor
                 of two with respect to ideal weak scaling. Compared to
                 optimized Giraph, an open-source alternative of Pregel,
                 SociaLite programs are 4 to 12 times faster across
                 benchmark algorithms, and 22 times more succinct on
                 average. As a declarative query language, SociaLite,
                 with the help of a compiler that generates efficient
                 parallel and approximate code, can be used easily to
                 create many social apps that operate on large-scale
                 distributed graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sarwat:2013:HDS,
  author =       "Mohamed Sarwat and Sameh Elnikety and Yuxiong He and
                 Mohamed F. Mokbel",
  title =        "{Horton+}: a distributed system for processing
                 declarative reachability queries over partitioned
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1918--1929",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Horton+ is a graph query processing system that
                 executes declarative reachability queries on a
                 partitioned attributed multi-graph. It employs a query
                 language, query optimizer, and a distributed execution
                 engine. The query language expresses declarative
                 reachability queries, and supports closures and
                 predicates on node and edge attributes to match graph
                 paths. We introduce three algebraic operators, select,
                 traverse, and join, and a query is compiled into an
                 execution plan containing these operators. As
                 reachability queries access the graph elements in a
                 random access pattern, the graph is therefore
                 maintained in the main memory of a cluster of servers
                 to reduce query execution time. We develop a
                 distributed execution engine that processes a query
                 plan in parallel on the graph servers. Since the query
                 language is declarative, we build a query optimizer
                 that uses graph statistics to estimate predicate
                 selectivity. We experimentally evaluate the system
                 performance on a cluster of 16 graph servers using
                 synthetic graphs as well as a real graph from an
                 application that uses reachability queries. The
                 evaluation shows (1) the efficiency of the optimizer in
                 reducing query execution time, (2) system scalability
                 with the size of the graph and with the number of
                 servers, and (3) the convenience of using declarative
                 queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sundaram:2013:SSS,
  author =       "Narayanan Sundaram and Aizana Turmukhametova and
                 Nadathur Satish and Todd Mostak and Piotr Indyk and
                 Samuel Madden and Pradeep Dubey",
  title =        "Streaming similarity search over one billion tweets
                 using parallel locality-sensitive hashing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1930--1941",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding nearest neighbors has become an important
                 operation on databases, with applications to text
                 search, multimedia indexing, and many other areas. One
                 popular algorithm for similarity search, especially for
                 high dimensional data (where spatial indexes like
                 kd-trees do not perform well) is Locality Sensitive
                 Hashing (LSH), an approximation algorithm for finding
                 similar objects. In this paper, we describe a new
                 variant of LSH, called Parallel LSH (PLSH) designed to
                 be extremely efficient, capable of scaling out on
                 multiple nodes and multiple cores, and which supports
                 high-throughput streaming of new data. Our approach
                 employs several novel ideas, including: cache-conscious
                 hash table layout, using a 2-level merge algorithm for
                 hash table construction; an efficient algorithm for
                 duplicate elimination during hash-table querying; an
                 insert-optimized hash table structure and efficient
                 data expiration algorithm for streaming data; and a
                 performance model that accurately estimates performance
                 of the algorithm and can be used to optimize parameter
                 settings. We show that on a workload where we perform
                 similarity search on a dataset of > 1 Billion tweets,
                 with hundreds of millions of new tweets per day, we can
                 achieve query times of 1--2.5 ms. We show that this is
                 an order of magnitude faster than existing indexing
                 schemes, such as inverted indexes. To the best of our
                 knowledge, this is the fastest implementation of LSH,
                 with table construction times up to $ 3.7 \times $
                 faster and query times that are $ 8.3 \times $ faster
                 than a basic implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{DeBrabant:2013:ACN,
  author =       "Justin DeBrabant and Andrew Pavlo and Stephen Tu and
                 Michael Stonebraker and Stan Zdonik",
  title =        "Anti-caching: a new approach to database management
                 system architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1942--1953",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The traditional wisdom for building disk-based
                 relational database management systems (DBMS) is to
                 organize data in heavily-encoded blocks stored on disk,
                 with a main memory block cache. In order to improve
                 performance given high disk latency, these systems use
                 a multi-threaded architecture with dynamic record-level
                 locking that allows multiple transactions to access the
                 database at the same time. Previous research has shown
                 that this results in substantial overhead for on-line
                 transaction processing (OLTP) applications [15]. The
                 next generation DBMSs seek to overcome these
                 limitations with architecture based on main memory
                 resident data. To overcome the restriction that all
                 data fit in main memory, we propose a new technique,
                 called anti-caching, where cold data is moved to disk
                 in a transactionally-safe manner as the database grows
                 in size. Because data initially resides in memory, an
                 anti-caching architecture reverses the traditional
                 storage hierarchy of disk-based systems. Main memory is
                 now the primary storage device. We implemented a
                 prototype of our anti-caching proposal in a
                 high-performance, main memory OLTP DBMS and performed a
                 series of experiments across a range of database sizes,
                 workload skews, and read/write mixes. We compared its
                 performance with an open-source, disk-based DBMS
                 optionally fronted by a distributed main memory cache.
                 Our results show that for higher skewed workloads the
                 anti-caching architecture has a performance advantage
                 over either of the other architectures tested of up to
                 $ 9 \times $ for a data size $ 8 \times $ larger than
                 memory.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qardaji:2013:UHM,
  author =       "Wahbeh Qardaji and Weining Yang and Ninghui Li",
  title =        "Understanding hierarchical methods for differentially
                 private histograms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1954--1965",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In recent years, many approaches to differentially
                 privately publish histograms have been proposed.
                 Several approaches rely on constructing tree structures
                 in order to decrease the error when answer large range
                 queries. In this paper, we examine the factors
                 affecting the accuracy of hierarchical approaches by
                 studying the mean squared error (MSE) when answering
                 range queries. We start with one-dimensional
                 histograms, and analyze how the MSE changes with
                 different branching factors, after employing
                 constrained inference, and with different methods to
                 allocate the privacy budget among hierarchy levels. Our
                 analysis and experimental results show that combining
                 the choice of a good branching factor with constrained
                 inference outperform the current state of the art.
                 Finally, we extend our analysis to multi-dimensional
                 histograms. We show that the benefits from employing
                 hierarchical methods beyond a single dimension are
                 significantly diminished, and when there are 3 or more
                 dimensions, it is almost always better to use the Flat
                 method instead of a hierarchy.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2013:TSD,
  author =       "Rui Li and Shengjie Wang and Kevin Chen-Chuan Chang",
  title =        "Towards social data platform: automatic topic-focused
                 monitor for {Twitter} stream",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1966--1977",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many novel applications have been built based on
                 analyzing tweets about specific topics. While these
                 applications provide different kinds of analysis, they
                 share a common task of monitoring ``target'' tweets
                 from the Twitter stream for a topic. The current
                 solution for this task tracks a set of manually
                 selected keywords with Twitter APIs. Obviously, this
                 manual approach has many limitations. In this paper, we
                 propose a data platform to automatically monitor target
                 tweets from the Twitter stream for any given topic. To
                 monitor target tweets in an optimal and continuous way,
                 we design Automatic Topic-focused Monitor (ATM), which
                 iteratively (1) samples tweets from the stream and (2)
                 selects keywords to track based on the samples. To
                 realize ATM, we develop a tweet sampling algorithm to
                 sample sufficient unbiased tweets with available
                 Twitter APIs, and a keyword selection algorithm to
                 efficiently select keywords that have a near-optimal
                 coverage of target tweets under cost constraints. We
                 conduct extensive experiments to show the effectiveness
                 of ATM. E.g., ATM covers 90\% of target tweets for a
                 topic and improves the manual approach by 49\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jin:2013:SFS,
  author =       "Ruoming Jin and Guan Wang",
  title =        "Simple, fast, and scalable reachability oracle",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1978--1989",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A reachability oracle (or hop labeling) assigns each
                 vertex $v$ two sets of vertices: $ {\rm Lout}(v) $ and
                 $ {\rm Lin}(v) $, such that $u$ reaches $v$ iff $ {\rm
                 Lout}(u) \cap {\rm Lin}(v) \neq 0 $. Despite their
                 simplicity and elegance, reachability oracles have
                 failed to achieve efficiency in more than ten years
                 since their introduction: The main problem is high
                 construction cost, which stems from a set-cover
                 framework and the need to materialize transitive
                 closure. In this paper, we present two simple and
                 efficient labeling algorithms, Hierarchical-Labeling
                 and Distribution-Labeling, which can work onmassive
                 real-world graphs: Their construction time is an order
                 of magnitude faster than the set-cover based labeling
                 approach, and transitive closure materialization is not
                 needed. On large graphs, their index sizes and their
                 query performance can now beat the state-of-the-art
                 transitive closure compression and online search
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bakibayev:2013:AOF,
  author =       "Nurzhan Bakibayev and Tom{\'a}s Kocisk{\'y} and Dan
                 Olteanu and Jakub Z{\'a}vodn{\'y}",
  title =        "Aggregation and ordering in factorised databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "1990--2001",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A common approach to data analysis involves
                 understanding and manipulating succinct representations
                 of data. In earlier work, we put forward a succinct
                 representation system for relational data called
                 factorised databases and reported on the main-memory
                 query engine FDB for select-project-join queries on
                 such databases. In this paper, we extend FDB to support
                 a larger class of practical queries with aggregates and
                 ordering. This requires novel optimisation and
                 evaluation techniques. We show how factorisation
                 coupled with partial aggregation can effectively reduce
                 the number of operations needed for query evaluation.
                 We also show how factorisations of query results can
                 support enumeration of tuples in desired orders as
                 efficiently as listing them from the unfactorised,
                 sorted results. We experimentally observe that FDB can
                 outperform off-the-shelf relational engines by orders
                 of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Park:2013:PCS,
  author =       "Yoonjae Park and Jun-Ki Min and Kyuseok Shim",
  title =        "Parallel computation of skyline and reverse skyline
                 queries using {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "2002--2013",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The skyline operator and its variants such as dynamic
                 skyline and reverse skyline operators have attracted
                 considerable attention recently due to their broad
                 applications. However, computations of such operators
                 are challenging today since there is an increasing
                 trend of applications to deal with big data. For such
                 data-intensive applications, the MapReduce framework
                 has been widely used recently. In this paper, we
                 propose efficient parallel algorithms for processing
                 the skyline and its variants using MapReduce. We first
                 build histograms to effectively prune out nonskyline
                 (non-reverse skyline) points in advance. We next
                 partition data based on the regions divided by the
                 histograms and compute candidate (reverse) skyline
                 points for each region independently using MapReduce.
                 Finally, we check whether each candidate point is
                 actually a (reverse) skyline point in every region
                 independently. Our performance study confirms the
                 effectiveness and scalability of the proposed
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xie:2013:FIG,
  author =       "Wenlei Xie and Guozhang Wang and David Bindel and Alan
                 Demers and Johannes Gehrke",
  title =        "Fast iterative graph computation with block updates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "6",
  number =       "14",
  pages =        "2014--2025",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Dec 13 05:57:13 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Scaling iterative graph processing applications to
                 large graphs is an important problem. Performance is
                 critical, as data scientists need to execute graph
                 programs many times with varying parameters. The need
                 for a high-level, high-performance programming model
                 has inspired much research on graph programming
                 frameworks. In this paper, we show that the important
                 class of computationally light graph applications ---
                 applications that perform little computation per vertex
                 --- has severe scalability problems across multiple
                 cores as these applications hit an early ``memory
                 wall'' that limits their speedup. We propose a novel
                 block-oriented computation model, in which computation
                 is iterated locally over blocks of highly connected
                 nodes, significantly improving the amount of
                 computation per cache miss. Following this model, we
                 describe the design and implementation of a block-aware
                 graph processing runtime that keeps the familiar
                 vertex-centric programming paradigm while reaping the
                 benefits of block-oriented execution. Our experiments
                 show that block-oriented execution significantly
                 improves the performance of our framework for several
                 graph applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2013:EEK,
  author =       "Xiaoli Wang and Xiaofeng Ding and Anthony K. H. Tung
                 and Zhenjie Zhang",
  title =        "Efficient and effective {KNN} sequence search with
                 approximate $n$-grams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "1",
  pages =        "1--12",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:56 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we address the problem of finding
                 $k$-nearest neighbors (KNN) in sequence databases using
                 the edit distance. Unlike most existing works using
                 short and exact $n$-gram matchings together with a
                 filter-and-refine framework for KNN sequence search,
                 our new approach allows us to use longer but
                 approximate $n$-gram matchings as a basis of KNN
                 candidates pruning. Based on this new idea, we devise a
                 pipeline framework over a two-level index for searching
                 KNN in the sequence database. By coupling this
                 framework together with several efficient filtering
                 strategies, i.e. the frequency queue and the well-known
                 Combined Algorithm (CA), our proposal brings various
                 enticing advantages over existing works, including (1)
                 huge reduction on false positive candidates to avoid
                 large overheads on candidate verifications; (2)
                 progressive result update and early termination; and
                 (3) good extensibility to parallel computation. We
                 conduct extensive experiments on three real datasets to
                 verify the superiority of the proposed framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2013:MSE,
  author =       "Weiren Yu and Xuemin Lin and Wenjie Zhang and Lijun
                 Chang and Jian Pei",
  title =        "More is simpler: effectively and efficiently assessing
                 node-pair similarities based on hyperlinks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "1",
  pages =        "13--24",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:56 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Similarity assessment is one of the core tasks in
                 hyperlink analysis. Recently, with the proliferation of
                 applications, e.g., web search and collaborative
                 filtering, SimRank has been a well-studied measure of
                 similarity between two nodes in a graph. It recursively
                 follows the philosophy that ``two nodes are similar if
                 they are referenced (have incoming edges) from similar
                 nodes'', which can be viewed as an aggregation of
                 similarities based on incoming paths. Despite its
                 popularity, SimRank has an undesirable property, i.e.,
                 ``zero-similarity'': It only accommodates paths with
                 equal length from a common ``center'' node. Thus, a
                 large portion of other paths are fully ignored. This
                 paper attempts to remedy this issue. (1) We propose and
                 rigorously justify SimRank*, a revised version of
                 SimRank, which resolves such counter-intuitive
                 ``zero-similarity'' issues while inheriting merits of
                 the basic SimRank philosophy. (2) We show that the
                 series form of SimRank* can be reduced to a fairly
                 succinct and elegant closed form, which looks even
                 simpler than SimRank, yet enriches semantics without
                 suffering from increased computational cost. This leads
                 to a fixed-point iterative paradigm of SimRank* in $ O
                 (K n m) $ time on a graph of $n$ nodes and $m$ edges
                 for $K$ iterations, which is comparable to SimRank. (3)
                 To further optimize SimRank* computation, we leverage a
                 novel clustering strategy via edge concentration. Due
                 to its NP-hardness, we devise an efficient and
                 effective heuristic to speed up SimRank* computation to
                 $ O(K n m)$ time, where $m$ is generally much smaller
                 than $m$. (4) Using real and synthetic data, we
                 empirically verify the rich semantics of SimRank*, and
                 demonstrate its high computation efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gyssens:2013:ATS,
  author =       "Marc Gyssens and Jan Paredaens and Dirk {Van Gucht}
                 and Jef Wijsen and Yuqing Wu",
  title =        "An approach towards the study of symmetric queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "1",
  pages =        "25--36",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:56 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many data-intensive applications have to query a
                 database that involves sequences of sets of objects. It
                 is not uncommon that the order of the sets in such a
                 sequence does not affect the result of the query. Such
                 queries are called symmetric. In this paper, the
                 authors wish to initiate research on symmetric queries.
                 Thereto, a data model is proposed in which a binary
                 relation between objects and set names encodes set
                 membership. On this data model, two query languages are
                 introduced, QuineCALC and SyCALC. They are correlated
                 in a manner that is made precise with the symmetric
                 Boolean functions of Quine, respectively symmetric
                 relational functions, on sequences of sets of given
                 length. The latter do not only involve the Boolean
                 operations union, intersection, and complement, but
                 also projection and Cartesian product. Quine's
                 characterization of symmetric Boolean functions in
                 terms of incidence information is generalized to
                 QuineCALC queries. In the process, an incidence-based
                 normal form for QuineCALC queries is proposed. Inspired
                 by these desirable incidence-related properties of
                 QuineCALC queries, counting-only queries are introduced
                 as SyCALC queries for which the result only depends on
                 incidence information. Counting-only queries are then
                 characterized as quantified Boolean combinations of
                 QuineCALC queries, and a normal form is proposed for
                 them as well. Finally, it is shown that, while it is
                 undecidable whether a SyCALC query is counting-only, it
                 is decidable whether a counting-only query is a
                 QuineCALC query.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Das:2013:CST,
  author =       "Sudipto Das and Vivek R. Narasayya and Feng Li and
                 Manoj Syamala",
  title =        "{CPU} sharing techniques for performance isolation in
                 multi-tenant relational database-as-a-service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "1",
  pages =        "37--48",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:56 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multi-tenancy and resource sharing are essential to
                 make a Database-as-a-Service (DaaS) cost-effective.
                 However, one major consequence of resource sharing is
                 that the performance of one tenant's workload can be
                 significantly affected by the resource demands of
                 co-located tenants. The lack of performance isolation
                 in a shared environment can make DaaS less attractive
                 to performance-sensitive tenants. Our approach to
                 performance isolation in a DaaS is to isolate the key
                 resources needed by the tenants' workload. In this
                 paper, we focus on the problem of effectively sharing
                 and isolating CPU among co-located tenants in a
                 multi-tenant DaaS. We show that traditional CPU sharing
                 abstractions and algorithms are inadequate to support
                 several key new requirements that arise in DaaS: (a)
                 absolute and fine-grained CPU reservations without
                 static allocation; (b) support elasticity by
                 dynamically adapting to bursty resource demands; and
                 (c) enable the DaaS provider to suitably tradeoff
                 revenue with fairness. We implemented these new
                 scheduling algorithms in a commercial DaaS prototype
                 and extensive experiments demonstrate the effectiveness
                 of our techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2013:ATK,
  author =       "Qian Chen and Haibo Hu and Jianliang Xu",
  title =        "Authenticating top-$k$ queries in location-based
                 services with confidentiality",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "1",
  pages =        "49--60",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:56 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "State-of-the-art location-based services (LBSs)
                 involve data owners, requesting clients, and service
                 providers. As LBSs become new business opportunities,
                 there is an increasing necessity to verify the
                 genuineness of service results. Unfortunately, while
                 traditional query authentication techniques can address
                 this issue, they fail to protect the confidentiality of
                 data, which is sensitive location information when LBSs
                 are concerned. Recent work has studied how to preserve
                 such location privacy in query authentication. However,
                 the prior work is limited to range queries, where
                 private values only appear on one side of the range
                 comparison. In this paper, we address the more
                 challenging authentication problem on top-$k$ queries,
                 where private values appear on both sides of a
                 comparison. To start with, we propose two novel
                 cryptographic building blocks, followed by a
                 comprehensive design of authentication schemes for
                 top-$k$ queries based on R-tree and Power Diagram
                 indexes. Optimizations, security analysis, and
                 experimental results consistently show the
                 effectiveness and robustness of the proposed schemes
                 under various system settings and query workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qi:2013:TDO,
  author =       "Zichao Qi and Yanghua Xiao and Bin Shao and Haixun
                 Wang",
  title =        "Toward a distance oracle for billion-node graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "1",
  pages =        "61--72",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:56 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The emergence of real life graphs with billions of
                 nodes poses significant challenges for managing and
                 querying these graphs. One of the fundamental queries
                 submitted to graphs is the shortest distance query.
                 Online BFS (breadth-first search) and offline
                 pre-computing pairwise shortest distances are
                 prohibitive in time or space complexity for
                 billion-node graphs. In this paper, we study the
                 feasibility of building distance oracles for
                 billion-node graphs. A distance oracle provides
                 approximate answers to shortest distance queries by
                 using a pre-computed data structure for the graph.
                 Sketch-based distance oracles are good candidates
                 because they assign each vertex a sketch of bounded
                 size, which means they have linear space complexity.
                 However, state-of-the-art sketch-based distance oracles
                 lack efficiency or accuracy when dealing with big
                 graphs. In this paper, we address the scalability and
                 accuracy issues by focusing on optimizing the three key
                 factors that affect the performance of distance
                 oracles: landmark selection, distributed BFS, and
                 answer generation. We conduct extensive experiments on
                 both real networks and synthetic networks to show that
                 we can build distance oracles of affordable cost and
                 efficiently answer shortest distance queries even for
                 billion-node graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kaul:2013:FSP,
  author =       "Manohar Kaul and Raymond Chi-Wing Wong and Bin Yang
                 and Christian S. Jensen",
  title =        "Finding shortest paths on terrains by killing two
                 birds with one stone",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "1",
  pages =        "73--84",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:56 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increasing availability of terrain data,
                 e.g., from aerial laser scans, the management of such
                 data is attracting increasing attention in both
                 industry and academia. In particular, spatial queries,
                 e.g., $k$-nearest neighbor and reverse nearest neighbor
                 queries, in Euclidean and spatial network spaces are
                 being extended to terrains. Such queries all rely on an
                 important operation, that of finding shortest surface
                 distances. However, shortest surface distance
                 computation is very time consuming. We propose
                 techniques that enable efficient computation of lower
                 and upper bounds of the shortest surface distance,
                 which enable faster query processing by eliminating
                 expensive distance computations. Empirical studies show
                 that our bounds are much tighter than the best-known
                 bounds in many cases and that they enable speedups of
                 up to 43 times for some well-known spatial queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Balkesen:2013:MCM,
  author =       "Cagri Balkesen and Gustavo Alonso and Jens Teubner and
                 M. Tamer {\"O}zsu",
  title =        "Multi-core, main-memory joins: sort vs. hash
                 revisited",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "1",
  pages =        "85--96",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:56 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper we experimentally study the performance
                 of main-memory, parallel, multi-core join algorithms,
                 focusing on sort-merge and (radix-)hash join. The
                 relative performance of these two join approaches have
                 been a topic of discussion for a long time. With the
                 advent of modern multi-core architectures, it has been
                 argued that sort-merge join is now a better choice than
                 radix-hash join. This claim is justified based on the
                 width of SIMD instructions (sort-merge outperforms
                 radix-hash join once SIMD is sufficiently wide), and
                 NUMA awareness (sort-merge is superior to hash join in
                 NUMA architectures). We conduct extensive experiments
                 on the original and optimized versions of these
                 algorithms. The experiments show that, contrary to
                 these claims, radix-hash join is still clearly
                 superior, and sort-merge approaches to performance of
                 radix only when very large amounts of data are
                 involved. The paper also provides the fastest
                 implementations of these algorithms, and covers many
                 aspects of modern hardware architectures relevant not
                 only for joins but for any parallel data processing
                 operator.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schuhknecht:2013:UPD,
  author =       "Felix Martin Schuhknecht and Alekh Jindal and Jens
                 Dittrich",
  title =        "The uncracked pieces in database cracking",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "2",
  pages =        "97--108",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:58 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database cracking has been an area of active research
                 in recent years. The core idea of database cracking is
                 to create indexes adaptively and incrementally as a
                 side-product of query processing. Several works have
                 proposed different cracking techniques for different
                 aspects including updates, tuple-reconstruction,
                 convergence, concurrency-control, and robustness.
                 However, there is a lack of any comparative study of
                 these different methods by an independent group. In
                 this paper, we conduct an experimental study on
                 database cracking. Our goal is to critically review
                 several aspects, identify the potential, and propose
                 promising directions in database cracking. With this
                 study, we hope to expand the scope of database cracking
                 and possibly leverage cracking in database engines
                 other than MonetDB. We repeat several prior database
                 cracking works including the core cracking algorithms
                 as well as three other works on convergence (hybrid
                 cracking), tuple-reconstruction (sideways cracking),
                 and robustness (stochastic cracking) respectively. We
                 evaluate these works and show possible directions to do
                 even better. We further test cracking under a variety
                 of experimental settings, including high selectivity
                 queries, low selectivity queries, and multiple query
                 access patterns. Finally, we compare cracking against
                 different sorting algorithms as well as against
                 different main-memory optimised indexes, including the
                 recently proposed Adaptive Radix Tree (ART). Our
                 results show that: (i) the previously proposed cracking
                 algorithms are repeatable, (ii) there is still enough
                 room to significantly improve the previously proposed
                 cracking algorithms, (iii) cracking depends heavily on
                 query selectivity, (iv) cracking needs to catch up with
                 modern indexing trends, and (v) different indexing
                 algorithms have different indexing signatures.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eravci:2013:DBR,
  author =       "Bahaeddin Eravci and Hakan Ferhatosmanoglu",
  title =        "Diversity based relevance feedback for time series
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "2",
  pages =        "109--120",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:58 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose a diversity based relevance feedback
                 approach for time series data to improve the accuracy
                 of search results. We first develop the concept of
                 relevance feedback for time series based on dual-tree
                 complex wavelet (CWT) and SAX based approaches. We aim
                 to enhance the search quality by incorporating
                 diversity in the results presented to the user for
                 feedback. We then propose a method which utilizes the
                 representation type as part of the feedback, as opposed
                 to a human choosing based on a preprocessing or
                 training phase. The proposed methods utilize a
                 weighting to handle the relevance feedback of important
                 properties for both single and multiple representation
                 cases. Our experiments on a large variety of time
                 series data sets show that the proposed diversity based
                 relevance feedback improves the retrieval performance.
                 Results confirm that representation feedback
                 incorporates item diversity implicitly and achieves
                 good performance even when using simple nearest
                 neighbor as the retrieval method. To the best of our
                 knowledge, this is the first study on diversification
                 of time series search to improve retrieval accuracy and
                 representation feedback.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pelley:2013:SMN,
  author =       "Steven Pelley and Thomas F. Wenisch and Brian T. Gold
                 and Bill Bridge",
  title =        "Storage management in the {NVRAM} era",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "2",
  pages =        "121--132",
  month =        oct,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:21:58 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Emerging nonvolatile memory technologies (NVRAM) offer
                 an alternative to disk that is persistent, provides
                 read latency similar to DRAM, and is byte-addressable.
                 Such NVRAMs could revolutionize online transaction
                 processing (OLTP), which today must employ
                 sophisticated optimizations with substantial software
                 overheads to overcome the long latency and poor random
                 access performance of disk. Nevertheless, many
                 candidate NVRAM technologies exhibit their own
                 limitations, such as greater-than-DRAM latency,
                 particularly for writes. In this paper, we reconsider
                 OLTP durability management to optimize recovery
                 performance and forward-processing throughput for
                 emerging NVRAMs. First, we demonstrate that using NVRAM
                 as a drop-in replacement for disk allows
                 near-instantaneous recovery, but software complexity
                 necessary for disk (i.e., Write Ahead Logging/ARIES)
                 limits transaction throughput. Next, we consider the
                 possibility of removing software-managed DRAM
                 buffering. Finally, we measure the cost of ordering
                 writes to NVRAM, which is vital for correct recovery.
                 We consider three recovery mechanisms: NVRAM
                 Disk-Replacement, In-Place Updates (transactions
                 persist data in-place), and NVRAM Group Commit
                 (transactions commit/persist atomically in batches).
                 Whereas In-Place Updates offers the simplest design, it
                 introduces persist synchronizations at every page
                 update. NVRAM Group Commit minimizes persist
                 synchronization, offering up to a 50\% throughput
                 improvement for large synchronous persist latencies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Salloum:2013:OOO,
  author =       "Mariam Salloum and Xin Luna Dong and Divesh Srivastava
                 and Vassilis J. Tsotras",
  title =        "Online ordering of overlapping data sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "3",
  pages =        "133--144",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:00 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data integration systems offer a uniform interface for
                 querying a large number of autonomous and heterogeneous
                 data sources. Ideally, answers are returned as sources
                 are queried and the answer list is updated as more
                 answers arrive. Choosing a good ordering in which the
                 sources are queried is critical for increasing the rate
                 at which answers are returned. However, this problem is
                 challenging since we often do not have complete or
                 precise statistics of the sources, such as their
                 coverage and overlap. It is further exacerbated in the
                 Big Data era, which is witnessing two trends in
                 Deep-Web data: first, obtaining a full coverage of data
                 in a particular domain often requires extracting data
                 from thousands of sources; second, there is often a big
                 variation in overlap between different data sources. In
                 this paper we present OASIS, an {Online} query
                 {Answering} {System} for {overlappIng} {Sources}. OASIS
                 has three key components for source ordering. First,
                 the Overlap Estimation component estimates overlaps
                 between sources according to available statistics under
                 the Maximum Entropy principle. Second, the Source
                 Ordering component orders the sources according to the
                 new contribution they are expected to provide, and
                 adjusts the ordering based on statistics collected
                 during query answering. Third, the Statistics
                 Enrichment component selects critical missing
                 statistics to enrich at runtime. Experimental results
                 on both real and synthetic data show high efficiency
                 and scalability of our algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2013:MQO,
  author =       "Guoping Wang and Chee-Yong Chan",
  title =        "Multi-query optimization in {MapReduce} framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "3",
  pages =        "145--156",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:00 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MapReduce has recently emerged as a new paradigm for
                 large-scale data analysis due to its high scalability,
                 fine-grained fault tolerance and easy programming
                 model. Since different jobs often share similar work
                 (e.g., several jobs scan the same input file or produce
                 the same map output), there are many opportunities to
                 optimize the performance for a batch of jobs. In this
                 paper, we propose two new techniques for multi-job
                 optimization in the MapReduce framework. The first is a
                 generalized grouping technique (which generalizes the
                 recently proposed MRShare technique) that merges
                 multiple jobs into a single job thereby enabling the
                 merged jobs to share both the scan of the input file as
                 well as the communication of the common map output. The
                 second is a materialization technique that enables
                 multiple jobs to share both the scan of the input file
                 as well as the communication of the common map output
                 via partial materialization of the map output of some
                 jobs (in the map and/or reduce phase). Our second
                 contribution is the proposal of a new optimization
                 algorithm that given an input batch of jobs, produces
                 an optimal plan by a judicious partitioning of the jobs
                 into groups and an optimal assignment of the processing
                 technique to each group. Our experimental results on
                 Hadoop demonstrate that our new approach significantly
                 outperforms the state-of-the-art technique, MRShare, by
                 up to 107\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2013:AAD,
  author =       "Zhenhui Li and Bolin Ding and Fei Wu and Tobias Kin
                 Hou Lei and Roland Kays and Margaret C. Crofoot",
  title =        "Attraction and avoidance detection from movements",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "3",
  pages =        "157--168",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:00 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the development of positioning technology,
                 movement data has become widely available nowadays. An
                 important task in movement data analysis is to mine the
                 relationships among moving objects based on their
                 spatiotemporal interactions. Among all relationship
                 types, attraction and avoidance are arguably the most
                 natural ones. However, rather surprisingly, there is no
                 existing method that addresses the problem of mining
                 significant attraction and avoidance relationships in a
                 well-defined and unified framework. In this paper, we
                 propose a novel method to measure the significance
                 value of relationship between any two objects by
                 examining the background model of their movements via
                 permutation test. Since permutation test is
                 computationally expensive, two effective pruning
                 strategies are developed to reduce the computation
                 time. Furthermore, we show how the proposed method can
                 be extended to efficiently answer the classic threshold
                 query: given an object, retrieve all the objects in the
                 database that have relationships, whose significance
                 values are above certain threshold, with the query
                 object. Empirical studies on both synthetic data and
                 real movement data demonstrate the effectiveness and
                 efficiency of our method.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhao:2013:PBA,
  author =       "Xiang Zhao and Chuan Xiao and Xuemin Lin and Qing Liu
                 and Wenjie Zhang",
  title =        "A partition-based approach to structure similarity
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "3",
  pages =        "169--180",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:00 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphs are widely used to model complex data in many
                 applications, such as bioinformatics, chemistry, social
                 networks, pattern recognition, etc. A fundamental and
                 critical query primitive is to efficiently search
                 similar structures in a large collection of graphs.
                 This paper studies the graph similarity queries with
                 edit distance constraints. Existing solutions to the
                 problem utilize fixed-size overlapping substructures to
                 generate candidates, and thus become susceptible to
                 large vertex degrees or large distance thresholds. In
                 this paper, we present a partition-based approach to
                 tackle the problem. By dividing data graphs into
                 variable-size non-overlapping partitions, the edit
                 distance constraint is converted to a graph containment
                 constraint for candidate generation. We develop
                 efficient query processing algorithms based on the new
                 paradigm. A candidate pruning technique and an improved
                 graph edit distance algorithm are also developed to
                 further boost the performance. In addition, a
                 cost-aware graph partitioning technique is devised to
                 optimize the index. Extensive experiments demonstrate
                 our approach significantly outperforms existing
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bailis:2013:HAT,
  author =       "Peter Bailis and Aaron Davidson and Alan Fekete and
                 Ali Ghodsi and Joseph M. Hellerstein and Ion Stoica",
  title =        "Highly available transactions: virtues and
                 limitations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "3",
  pages =        "181--192",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:00 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To minimize network latency and remain online during
                 server failures and network partitions, many modern
                 distributed data storage systems eschew transactional
                 functionality, which provides strong semantic
                 guarantees for groups of multiple operations over
                 multiple data items. In this work, we consider the
                 problem of providing Highly Available Transactions
                 (HATs): transactional guarantees that do not suffer
                 unavailability during system partitions or incur high
                 network latency. We introduce a taxonomy of highly
                 available systems and analyze existing ACID isolation
                 and distributed data consistency guarantees to identify
                 which can and cannot be achieved in HAT systems. This
                 unifies the literature on weak transactional isolation,
                 replica consistency, and highly available systems. We
                 analytically and experimentally quantify the
                 availability and performance benefits of HATs --- often
                 two to three orders of magnitude over wide-area
                 networks --- and discuss their necessary semantic
                 compromises.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tian:2013:TLV,
  author =       "Yuanyuan Tian and Andrey Balmin and Severin Andreas
                 Corsten and Shirish Tatikonda and John McPherson",
  title =        "From {``think like a vertex''} to {``think like a
                 graph''}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "3",
  pages =        "193--204",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:00 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To meet the challenge of processing rapidly growing
                 graph and network data created by modern applications,
                 a number of distributed graph processing systems have
                 emerged, such as Pregel and GraphLab. All these systems
                 divide input graphs into partitions, and employ a
                 ``think like a vertex'' programming model to support
                 iterative graph computation. This vertex-centric model
                 is easy to program and has been proved useful for many
                 graph algorithms. However, this model hides the
                 partitioning information from the users, thus prevents
                 many algorithm-specific optimizations. This often
                 results in longer execution time due to excessive
                 network messages (e.g. in Pregel) or heavy scheduling
                 overhead to ensure data consistency (e.g. in GraphLab).
                 To address this limitation, we propose a new ``think
                 like a graph'' programming paradigm. Under this
                 graph-centric model, the partition structure is opened
                 up to the users, and can be utilized so that
                 communication within a partition can bypass the heavy
                 message passing or scheduling machinery. We implemented
                 this model in a new system, called Giraph++, based on
                 Apache Giraph, an open source implementation of Pregel.
                 We explore the applicability of the graph-centric model
                 to three categories of graph algorithms, and
                 demonstrate its flexibility and superior performance,
                 especially on well-partitioned data. For example, on a
                 web graph with 118 million vertices and 855 million
                 edges, the graph-centric version of connected component
                 detection algorithm runs 63X faster and uses 204X fewer
                 network messages than its vertex-centric counterpart.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Niedermayer:2013:PNN,
  author =       "Johannes Niedermayer and Andreas Z{\"u}fle and Tobias
                 Emrich and Matthias Renz and Nikos Mamoulis and Lei
                 Chen and Hans-Peter Kriegel",
  title =        "Probabilistic nearest neighbor queries on uncertain
                 moving object trajectories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "3",
  pages =        "205--216",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:00 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nearest neighbor (NN) queries in trajectory databases
                 have received significant attention in the past, due to
                 their applications in spatio-temporal data analysis.
                 More recent work has considered the realistic case
                 where the trajectories are uncertain; however, only
                 simple uncertainty models have been proposed, which do
                 not allow for accurate probabilistic search. In this
                 paper, we fill this gap by addressing probabilistic
                 nearest neighbor queries in databases with uncertain
                 trajectories modeled by stochastic processes,
                 specifically the Markov chain model. We study three
                 nearest neighbor query semantics that take as input a
                 query state or trajectory $q$ and a time interval, and
                 theoretically evaluate their runtime complexity.
                 Furthermore we propose a sampling approach which uses
                 Bayesian inference to guarantee that sampled
                 trajectories conform to the observation data stored in
                 the database. This sampling approach can be used in
                 Monte-Carlo based approximation solutions. We include
                 an extensive experimental study to support our
                 theoretical results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karanasos:2013:DSD,
  author =       "Konstantinos Karanasos and Asterios Katsifodimos and
                 Ioana Manolescu",
  title =        "{Delta}: scalable data dissemination under capacity
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "217--228",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In content-based publish-subscribe (pub/sub) systems,
                 users express their interests as queries over a stream
                 of publications. Scaling up content-based pub/sub to
                 very large numbers of subscriptions is challenging:
                 users are interested in low latency, that is, getting
                 subscription results fast, while the pub/sub system
                 provider is mostly interested in scaling, i.e., being
                 able to serve large numbers of subscribers, with low
                 computational resources utilization. We present a novel
                 approach for scalable content-based pub/sub in the
                 presence of constraints on the available CPU and
                 network resources, implemented within our pub/sub
                 system Delta. We achieve scalability by off-loading
                 some subscriptions from the pub/sub server, and
                 leveraging view-based query rewriting to feed these
                 subscriptions from the data accumulated in others. Our
                 main contribution is a novel algorithm for organizing
                 views in a multi-level dissemination network,
                 exploiting view-based rewriting and powerful linear
                 programming capabilities to scale to many views,
                 respect capacity constraints, and minimize latency. The
                 efficiency and effectiveness of our algorithm are
                 confirmed through extensive experiments and a large
                 deployment in a WAN.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Budak:2013:GOD,
  author =       "Ceren Budak and Theodore Georgiou and Divyakant
                 Agrawal and Amr {El Abbadi}",
  title =        "{GeoScope}: online detection of geo-correlated
                 information trends in social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "229--240",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The First Law of Geography states ``Everything is
                 related to everything else, but near things are more
                 related than distant things''. This spatial
                 significance has implications in various applications,
                 trend detection being one of them. In this paper we
                 propose a new algorithmic tool, GeoScope, to detect
                 geo-trends. GeoScope is a data streams solution that
                 detects correlations between topics and locations in a
                 sliding window, in addition to analyzing topics and
                 locations independently. GeoScope offers theoretical
                 guarantees for detecting all trending correlated pairs
                 while requiring only sub-linear space and running time.
                 We perform various human validation tasks to
                 demonstrate the value of GeoScope. The results show
                 that human judges prefer GeoScope to the best
                 performing baseline solution 4:1 in terms of the
                 geographical significance of the presented information.
                 As the Twitter analysis demonstrates, GeoScope
                 successfully filters out topics without geo-intent and
                 detects various local interests such as emergency
                 events, political demonstrations or cultural events.
                 Experiments on Twitter show that GeoScope has perfect
                 recall and near-perfect precision.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Onizuka:2013:OIQ,
  author =       "Makoto Onizuka and Hiroyuki Kato and Soichiro Hidaka
                 and Keisuke Nakano and Zhenjiang Hu",
  title =        "Optimization for iterative queries on {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "241--252",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose OptIQ, a query optimization approach for
                 iterative queries in distributed environment. OptIQ
                 removes redundant computations among different
                 iterations by extending the traditional techniques of
                 view materialization and incremental view evaluation.
                 First, OptIQ decomposes iterative queries into
                 invariant and variant views, and materializes the
                 former view. Redundant computations are removed by
                 reusing the materialized view among iterations. Second,
                 OptIQ incrementally evaluates the variant view, so that
                 redundant computations are removed by skipping the
                 evaluation on converged tuples in the variant view. We
                 verify the effectiveness of OptIQ through the queries
                 of PageRank and $k$-means clustering on real datasets.
                 The results show that OptIQ achieves high efficiency,
                 up to five times faster than is possible without
                 removing the redundant computations among iterations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shuai:2013:WOS,
  author =       "Hong-Han Shuai and De-Nian Yang and Philip S. Yu and
                 Ming-Syan Chen",
  title =        "Willingness optimization for social group activity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "253--264",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Studies show that a person is willing to join a social
                 group activity if the activity is interesting, and if
                 some close friends also join the activity as
                 companions. The literature has demonstrated that the
                 interests of a person and the social tightness among
                 friends can be effectively derived and mined from
                 social networking websites. However, even with the
                 above two kinds of information widely available, social
                 group activities still need to be coordinated manually,
                 and the process is tedious and time-consuming for
                 users, especially for a large social group activity,
                 due to complications of social connectivity and the
                 diversity of possible interests among friends. To
                 address the above important need, this paper proposes
                 to automatically select and recommend potential
                 attendees of a social group activity, which could be
                 very useful for social networking websites as a
                 value-added service. We first formulate a new problem,
                 named Willingness mAximization for Social grOup (WASO).
                 This paper points out that the solution obtained by a
                 greedy algorithm is likely to be trapped in a local
                 optimal solution. Thus, we design a new randomized
                 algorithm to effectively and efficiently solve the
                 problem. Given the available computational budgets, the
                 proposed algorithm is able to optimally allocate the
                 resources and find a solution with an approximation
                 ratio. We implement the proposed algorithm in Facebook,
                 and the user study demonstrates that social groups
                 obtained by the proposed algorithm significantly
                 outperform the solutions manually configured by
                 users.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2013:HPS,
  author =       "Lei Cao and Elke A. Rundensteiner",
  title =        "High performance stream query processing with
                 correlation-aware partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "265--276",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "State-of-the-art optimizers produce one single optimal
                 query plan for all stream data, in spite of such a
                 singleton plan typically being sub-optimal or even poor
                 for highly correlated data. Recently a new stream
                 processing paradigm, called multi-route approach, has
                 emerged as a promising approach for tackling this
                 problem. Multi-route first divides data streams into
                 several partitions and then creates a separate query
                 plan for each combination of partitions. Unfortunately
                 current approaches suffer from severe shortcomings, in
                 particular, the lack of an effective partitioning
                 strategy and the prohibitive query optimization
                 expense. In this work we propose the first practical
                 multi-route optimizer named correlation-aware
                 multi-route stream query optimizer (or CMR) that solves
                 both problems. By exploiting both intra- and
                 inter-stream correlations of streams, CMR produces
                 effective partitions without having to undertake
                 repeated expensive query plan generation. The produced
                 partitions not only are best served by distinct optimal
                 query plans, but also leverage the partition-driven
                 pruning opportunity. Experimental results with both
                 synthetic and real life stream data confirm that CMR
                 outperforms the state-of-the-art solutions up to an
                 order of magnitude in both the query optimization time
                 and the run-time execution performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Difallah:2013:OBE,
  author =       "Djellel Eddine Difallah and Andrew Pavlo and Carlo
                 Curino and Philippe Cudre-Mauroux",
  title =        "{OLTP-Bench}: an extensible testbed for benchmarking
                 relational databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "277--288",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Benchmarking is an essential aspect of any database
                 management system (DBMS) effort. Despite several recent
                 advancements, such as pre-configured cloud database
                 images and database-as-a-service (DBaaS) offerings, the
                 deployment of a comprehensive testing platform with a
                 diverse set of datasets and workloads is still far from
                 being trivial. In many cases, researchers and
                 developers are limited to a small number of workloads
                 to evaluate the performance characteristics of their
                 work. This is due to the lack of a universal
                 benchmarking infrastructure, and to the difficulty of
                 gaining access to real data and workloads. This results
                 in lots of unnecessary engineering efforts and makes
                 the performance evaluation results difficult to
                 compare. To remedy these problems, we present
                 OLTP-Bench, an extensible ``batteries included'' DBMS
                 benchmarking testbed. The key contributions of
                 OLTP-Bench are its ease of use and extensibility,
                 support for tight control of transaction mixtures,
                 request rates, and access distributions over time, as
                 well as the ability to support all major DBMSs and
                 DBaaS platforms. Moreover, it is bundled with fifteen
                 workloads that all differ in complexity and system
                 demands, including four synthetic workloads, eight
                 workloads from popular benchmarks, and three workloads
                 that are derived from real-world applications. We
                 demonstrate through a comprehensive set of experiments
                 conducted on popular DBMS and DBaaS offerings the
                 different features provided by OLTP-Bench and the
                 effectiveness of our testbed in characterizing the
                 performance of database services.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nandi:2013:GQS,
  author =       "Arnab Nandi and Lilong Jiang and Michael Mandel",
  title =        "Gestural query specification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "289--300",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Direct, ad-hoc interaction with databases has
                 typically been performed over console-oriented
                 conversational interfaces using query languages such as
                 SQL. With the rise in popularity of gestural user
                 interfaces and computing devices that use gestures as
                 their exclusive modes of interaction, database query
                 interfaces require a fundamental rethinking to work
                 without keyboards. We present a novel query
                 specification system that allows the user to query
                 databases using a series of gestures. We present a
                 novel gesture recognition system that uses both the
                 interaction and the state of the database to classify
                 gestural input into relational database queries. We
                 conduct exhaustive systems performance tests and user
                 studies to demonstrate that our system is not only
                 performant and capable of interactive latencies, but it
                 is also more usable, faster to use and more intuitive
                 than existing systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Heise:2013:SDU,
  author =       "Arvid Heise and Jorge-Arnulfo Quian{\'e}-Ruiz and
                 Ziawasch Abedjan and Anja Jentzsch and Felix Naumann",
  title =        "Scalable discovery of unique column combinations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "301--312",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The discovery of all unique (and non-unique) column
                 combinations in a given dataset is at the core of any
                 data profiling effort. The results are useful for a
                 large number of areas of data management, such as
                 anomaly detection, data integration, data modeling,
                 duplicate detection, indexing, and query optimization.
                 However, discovering all unique and non-unique column
                 combinations is an NP-hard problem, which in principle
                 requires to verify an exponential number of column
                 combinations for uniqueness on all data values. Thus,
                 achieving efficiency and scalability in this context is
                 a tremendous challenge by itself. In this paper, we
                 devise Ducc, a scalable and efficient approach to the
                 problem of finding all unique and non-unique column
                 combinations in big datasets. We first model the
                 problem as a graph coloring problem and analyze the
                 pruning effect of individual combinations. We then
                 present our hybrid column-based pruning technique,
                 which traverses the lattice in a depth-first and random
                 walk combination. This strategy allows Ducc to
                 typically depend on the solution set size and hence to
                 prune large swaths of the lattice. Ducc also
                 incorporates row-based pruning to run uniqueness checks
                 in just few milliseconds. To achieve even higher
                 scalability, Ducc runs on several CPU cores (scale-up)
                 and compute nodes (scale-out) with a very low overhead.
                 We exhaustively evaluate Ducc using three datasets (two
                 real and one synthetic) with several millions rows and
                 hundreds of attributes. We compare Ducc with related
                 work: Gordian and HCA. The results show that Ducc is up
                 to more than 2 orders of magnitude faster than Gordian
                 and HCA (631x faster than Gordian and 398x faster than
                 HCA). Finally, a series of scalability experiments
                 shows the efficiency of Ducc to scale up and out.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tang:2013:EMD,
  author =       "Yu Tang and Leong Hou U. and Yilun Cai and Nikos
                 Mamoulis and Reynold Cheng",
  title =        "{Earth Mover's Distance} based similarity search at
                 scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "313--324",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Earth Mover's Distance (EMD), as a similarity measure,
                 has received a lot of attention in the fields of
                 multimedia and probabilistic databases, computer
                 vision, image retrieval, machine learning, etc. EMD on
                 multidimensional histograms provides better
                 distinguishability between the objects approximated by
                 the histograms (e.g., images), compared to classic
                 measures like Euclidean distance. Despite its
                 usefulness, EMD has a high computational cost;
                 therefore, a number of effective filtering methods have
                 been proposed, to reduce the pairs of histograms for
                 which the exact EMD has to be computed, during
                 similarity search. Still, EMD calculations in the
                 refinement step remain the bottleneck of the whole
                 similarity search process. In this paper, we focus on
                 optimizing the refinement phase of EMD-based similarity
                 search by (i) adapting an efficient min-cost flow
                 algorithm (SIA) for EMD computation, (ii) proposing a
                 dynamic distance bound, which can be used to terminate
                 an EMD refinement early, and (iii) proposing a dynamic
                 refinement order for the candidates which, paired with
                 a concurrent EMD refinement strategy, reduces the
                 amount of needless computations. Our proposed
                 techniques are orthogonal to and can be easily
                 integrated with the state-of-the-art filtering
                 techniques, reducing the cost of EMD-based similarity
                 queries by orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Parameswaran:2013:SVD,
  author =       "Aditya Parameswaran and Neoklis Polyzotis and Hector
                 Garcia-Molina",
  title =        "{SeeDB}: visualizing database queries efficiently",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "4",
  pages =        "325--328",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:02 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data scientists rely on visualizations to interpret
                 the data returned by queries, but finding the right
                 visualization remains a manual task that is often
                 laborious. We propose a DBMS that partially automates
                 the task of finding the right visualizations for a
                 query. In a nutshell, given an input query Q, the new
                 DBMS optimizer will explore not only the space of
                 physical plans for Q, but also the space of possible
                 visualizations for the results of Q. The output will
                 comprise a recommendation of potentially
                 ``interesting'' or ``useful'' visualizations, where
                 each visualization is coupled with a suitable query
                 execution plan. We discuss the technical challenges in
                 building this system and outline an agenda for future
                 research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mahmoud:2014:MES,
  author =       "Hatem A. Mahmoud and Vaibhav Arora and Faisal Nawab
                 and Divyakant Agrawal and Amr {El Abbadi}",
  title =        "{MaaT}: effective and scalable coordination of
                 distributed transactions in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "329--340",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The past decade has witnessed an increasing adoption
                 of cloud database technology, which provides better
                 scalability, availability, and fault-tolerance via
                 transparent partitioning and replication, and automatic
                 load balancing and fail-over. However, only a small
                 number of cloud databases provide strong consistency
                 guarantees for distributed transactions, despite
                 decades of research on distributed transaction
                 processing, due to practical challenges that arise in
                 the cloud setting, where failures are the norm, and
                 human administration is minimal. For example, dealing
                 with locks left by transactions initiated by failed
                 machines, and determining a multi-programming level
                 that avoids thrashing without under-utilizing available
                 resources, are some of the challenges that arise when
                 using lock-based transaction processing mechanisms in
                 the cloud context. Even in the case of optimistic
                 concurrency control, most proposals in the literature
                 deal with distributed validation but still require the
                 database to acquire locks during two-phase commit when
                 installing updates of a single transaction on multiple
                 machines. Very little theoretical work has been done to
                 entirely eliminate the need for locking in distributed
                 transactions, including locks acquired during two-phase
                 commit. In this paper, we re-design optimistic
                 concurrency control to eliminate any need for locking
                 even for atomic commitment, while handling the
                 practical issues in earlier theoretical work related to
                 this problem. We conduct an extensive experimental
                 study to evaluate our approach against lock-based
                 methods under various setups and workloads, and
                 demonstrate that our approach provides many practical
                 advantages in the cloud context.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:DWA,
  author =       "Chao Li and Michael Hay and Gerome Miklau and Yue
                 Wang",
  title =        "A data- and workload-aware algorithm for range queries
                 under differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "341--352",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We describe a new algorithm for answering a given set
                 of range queries under $ \epsilon $-differential
                 privacy which often achieves substantially lower error
                 than competing methods. Our algorithm satisfies
                 differential privacy by adding noise that is adapted to
                 the input data and to the given query set. We first
                 privately learn a partitioning of the domain into
                 buckets that suit the input data well. Then we
                 privately estimate counts for each bucket, doing so in
                 a manner well-suited for the given query set. Since the
                 performance of the algorithm depends on the input
                 database, we evaluate it on a wide range of real
                 datasets, showing that we can achieve the benefits of
                 data-dependence on both ``easy'' and ``hard''
                 databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Greco:2014:CQA,
  author =       "Sergio Greco and Fabian Pijcke and Jef Wijsen",
  title =        "Certain query answering in partially consistent
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "353--364",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A database is called uncertain if two or more tuples
                 of the same relation are allowed to agree on their
                 primary key. Intuitively, such tuples act as
                 alternatives for each other. A repair (or possible
                 world) of such uncertain database is obtained by
                 selecting a maximal number of tuples without ever
                 selecting two tuples of the same relation that agree on
                 their primary key. For a Boolean query $q$, the problem
                 $ {\rm CERTAINTY}(q)$ takes as input an uncertain
                 database db and asks whether $q$ evaluates to true on
                 every repair of db. In recent years, the complexity of
                 $ {\rm CERTAINTY}(q)$ has been studied under different
                 restrictions on $q$. These complexity studies have
                 assumed no restrictions on the uncertain databases that
                 are input to $ {\rm CERTAINTY}(q)$. In practice,
                 however, it may be known that these input databases are
                 partially consistent, in the sense that they satisfy
                 some dependencies (e.g., functional dependencies). In
                 this article, we introduce the problem $ {\rm
                 CERTAINTY}(q)$ in the presence of a set $ \Sigma $ of
                 dependencies. The problem $ {\rm CERTAINTY}(q, \Sigma)$
                 takes as input an uncertain database db that satisfies
                 $ \Sigma $, and asks whether every repair of db
                 satisfies $q$. We focus on the complexity of $ {\rm
                 CERTAINTY}(q, \Sigma)$ when $q$ is an acyclic
                 conjunctive query without self-join, and $ \Sigma $ is
                 a set of functional dependencies and join dependencies,
                 the latter of a particular form. We provide an
                 algorithm that, given $q$ and $ \Sigma $, decides
                 whether $ {\rm CERTAINTY}(q, \Sigma)$ is first-order
                 expressible. Moreover, we show how to effectively
                 construct a first-order definition of $ {\rm
                 CERTAINTY}(q, \Sigma)$ if it exists.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mottin:2014:EQG,
  author =       "Davide Mottin and Matteo Lissandrini and Yannis
                 Velegrakis and Themis Palpanas",
  title =        "Exemplar queries: give me an example of what you
                 need",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "365--376",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Search engines are continuously employing advanced
                 techniques that aim to capture user intentions and
                 provide results that go beyond the data that simply
                 satisfy the query conditions. Examples include the
                 personalized results, related searches, similarity
                 search, popular and relaxed queries. In this work we
                 introduce a novel query paradigm that considers a user
                 query as an example of the data in which the user is
                 interested. We call these queries exemplar queries and
                 claim that they can play an important role in dealing
                 with the information deluge. We provide a formal
                 specification of the semantics of such queries and show
                 that they are fundamentally different from notions like
                 queries by example, approximate and related queries. We
                 provide an implementation of these semantics for
                 graph-based data and present an exact solution with a
                 number of optimizations that improve performance
                 without compromising the quality of the answers. We
                 also provide an approximate solution that prunes the
                 search space and achieves considerably better
                 time-performance with minimal or no impact on
                 effectiveness. We experimentally evaluate the
                 effectiveness and efficiency of these solutions with
                 synthetic and real datasets, and illustrate the
                 usefulness of exemplar queries in practice.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Korula:2014:ERA,
  author =       "Nitish Korula and Silvio Lattanzi",
  title =        "An efficient reconciliation algorithm for social
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "377--388",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "People today typically use multiple online social
                 networks (Facebook, Twitter, Google+, LinkedIn, etc.).
                 Each online network represents a subset of their
                 ``real'' ego-networks. An interesting and challenging
                 problem is to reconcile these online networks, that is,
                 to identify all the accounts belonging to the same
                 individual. Besides providing a richer understanding of
                 social dynamics, the problem has a number of practical
                 applications. At first sight, this problem appears
                 algorithmically challenging. Fortunately, a small
                 fraction of individuals explicitly link their accounts
                 across multiple networks; our work leverages these
                 connections to identify a very large fraction of the
                 network. Our main contributions are to mathematically
                 formalize the problem for the first time, and to design
                 a simple, local, and efficient parallel algorithm to
                 solve it. We are able to prove strong theoretical
                 guarantees on the algorithm's performance on
                 well-established network models (Random Graphs,
                 Preferential Attachment). We also experimentally
                 confirm the effectiveness of the algorithm on synthetic
                 and real social network data sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chester:2014:CKR,
  author =       "Sean Chester and Alex Thomo and S. Venkatesh and Sue
                 Whitesides",
  title =        "Computing $k$-regret minimizing sets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "389--400",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Regret minimizing sets are a recent approach to
                 representing a dataset $D$ by a small subset $R$ of
                 size $r$ of representative data points. The set $R$ is
                 chosen such that executing any top-1 query on $R$
                 rather than $D$ is minimally perceptible to any user.
                 However, such a subset $R$ may not exist, even for
                 modest sizes, $r$. In this paper, we introduce the
                 relaxation to $k$-regret minimizing sets, whereby a
                 top-$1$ query on $R$ returns a result imperceptibly
                 close to the top-$k$ on $D$. We show that, in general,
                 with or without the relaxation, this problem is
                 NP-hard. For the specific case of two dimensions, we
                 give an efficient dynamic programming, plane sweep
                 algorithm based on geometric duality to find an optimal
                 solution. For arbitrary dimension, we give an
                 empirically effective, greedy, randomized algorithm
                 based on linear programming. With these algorithms, we
                 can find subsets $R$ of much smaller size that better
                 summarize $D$, using small values of $k$ larger than
                 $1$.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2014:RTK,
  author =       "Adams Wei Yu and Nikos Mamoulis and Hao Su",
  title =        "Reverse top-$k$ search using random walk with
                 restart",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "401--412",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increasing popularity of social networks,
                 large volumes of graph data are becoming available.
                 Large graphs are also derived by structure extraction
                 from relational, text, or scientific data (e.g.,
                 relational tuple networks, citation graphs, ontology
                 networks, protein-protein interaction graphs).
                 Node-to-node proximity is the key building block for
                 many graph-based applications that search or analyze
                 the data. Among various proximity measures, random walk
                 with restart (RWR) is widely adopted because of its
                 ability to consider the global structure of the whole
                 network. Although RWR-based similarity search has been
                 well studied before, there is no prior work on reverse
                 top-$k$ proximity search in graphs based on RWR. We
                 discuss the applicability of this query and show that
                 its direct evaluation using existing methods on
                 RWR-based similarity search has very high computational
                 and storage demands. To address this issue, we propose
                 an indexing technique, paired with an on-line reverse
                 top-$k$ search algorithm. Our experiments show that our
                 technique is efficient and has manageable storage
                 requirements even when applied on very large graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Viglas:2014:WLS,
  author =       "Stratis D. Viglas",
  title =        "Write-limited sorts and joins for persistent memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "413--424",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To mitigate the impact of the widening gap between the
                 memory needs of CPUs and what standard memory
                 technology can deliver, system architects have
                 introduced a new class of memory technology termed
                 persistent memory. Persistent memory is
                 byte-addressable, but exhibits asymmetric I/O: writes
                 are typically one order of magnitude more expensive
                 than reads. Byte addressability combined with I/O
                 asymmetry render the performance profile of persistent
                 memory unique. Thus, it becomes imperative to find new
                 ways to seamlessly incorporate it into database
                 systems. We do so in the context of query processing.
                 We focus on the fundamental operations of sort and join
                 processing. We introduce the notion of write-limited
                 algorithms that effectively minimize the I/O cost. We
                 give a high-level API that enables the system to
                 dynamically optimize the workflow of the algorithms;
                 or, alternatively, allows the developer to tune the
                 write profile of the algorithms. We present four
                 different techniques to incorporate persistent memory
                 into the database processing stack in light of this
                 API. We have implemented and extensively evaluated all
                 our proposals. Our results show that the algorithms
                 deliver on their promise of I/O-minimality and tunable
                 performance. We showcase the merits and deficiencies of
                 each implementation technique, thus taking a solid
                 first step towards incorporating persistent memory into
                 query processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Anciaux:2014:FOD,
  author =       "N. Anciaux and L. Bouganim and T. Delot and S. Ilarri
                 and L. Kloul and N. Mitton and P. Pucheral",
  title =        "{Folk-IS}: opportunistic data services in least
                 developed countries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "5",
  pages =        "425--428",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:04 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "According to a wide range of studies, IT should become
                 a key facilitator in establishing primary education,
                 reducing mortality and supporting commercial
                 initiatives in Least Developed Countries (LDCs). The
                 main barrier to the development of IT services in these
                 regions is not only the lack of communication
                 facilities, but also the lack of consistent information
                 systems, security procedures, economic and legal
                 support, as well as political commitment. In this
                 paper, we propose the vision of an infrastructureless
                 data platform well suited for the development of
                 innovative IT services in LDCs. We propose a
                 participatory approach, where each individual
                 implements a small subset of a complete information
                 system thanks to highly secure, portable and low-cost
                 personal devices as well as opportunistic networking,
                 without the need of any form of infrastructure. We
                 review the technical challenges that are specific to
                 this approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Giannikis:2014:SWO,
  author =       "Georgios Giannikis and Darko Makreshanski and Gustavo
                 Alonso and Donald Kossmann",
  title =        "Shared workload optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "6",
  pages =        "429--440",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:06 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As a result of increases in both the query load and
                 the data managed, as well as changes in hardware
                 architecture (multicore), the last years have seen a
                 shift from query-at-a-time approaches towards shared
                 work (SW) systems where queries are executed in groups.
                 Such groups share operators like scans and joins,
                 leading to systems that process hundreds to thousands
                 of queries in one go. SW systems range from storage
                 engines that use in-memory co-operative scans to more
                 complex query processing engines that share joins over
                 analytical and star schema queries. In all cases, they
                 rely on either single query optimizers, predicate
                 sharing, or on manually generated plans. In this paper
                 we explore the problem of shared workload optimization
                 (SWO) for SW systems. The challenge in doing so is that
                 the optimization has to be done for the entire workload
                 and that results in a class of stochastic knapsack with
                 uncertain weights optimization, which can only be
                 addressed with heuristics to achieve a reasonable
                 runtime. In this paper we focus on hash joins and
                 shared scans and present a first algorithm capable of
                 optimizing the execution of entire workloads by
                 deriving a global executing plan for all the queries in
                 the system. We evaluate the optimizer over the TPC-W
                 and the TPC-H benchmarks. The results prove the
                 feasibility of this approach and demonstrate the
                 performance gains that can be obtained from SW
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elseidy:2014:SAO,
  author =       "Mohammed Elseidy and Abdallah Elguindy and Aleksandar
                 Vitorovic and Christoph Koch",
  title =        "Scalable and adaptive online joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "6",
  pages =        "441--452",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:06 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Scalable join processing in a parallel shared-nothing
                 environment requires a partitioning policy that evenly
                 distributes the processing load while minimizing the
                 size of state maintained and number of messages
                 communicated. Previous research proposes static
                 partitioning schemes that require statistics
                 beforehand. In an online or streaming environment in
                 which no statistics about the workload are known,
                 traditional static approaches perform poorly. This
                 paper presents a novel parallel online dataflow join
                 operator that supports arbitrary join predicates. The
                 proposed operator continuously adjusts itself to the
                 data dynamics through adaptive dataflow routing and
                 state repartitioning. The operator is resilient to data
                 skew, maintains high throughput rates, avoids blocking
                 behavior during state repartitioning, takes an eventual
                 consistency approach for maintaining its local state,
                 and behaves strongly consistently as a black-box
                 dataflow operator. We prove that the operator ensures a
                 constant competitive ratio 3:75 in data distribution
                 optimality and that the cost of processing an input
                 tuple is amortized constant, taking into account
                 adaptivity costs. Our evaluation demonstrates that our
                 operator outperforms the state-of-the-art static
                 partitioning schemes in resource utilization,
                 throughput, and execution time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Morton:2014:SDE,
  author =       "Kristi Morton and Magdalena Balazinska and Dan
                 Grossman and Jock Mackinlay",
  title =        "Support the data enthusiast: challenges for
                 next-generation data-analysis systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "6",
  pages =        "453--456",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:06 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a vision of next-generation visual
                 analytics services. We argue that these services should
                 have three related capabilities: support visual and
                 interactive data exploration as they do today, but also
                 suggest relevant data to enrich visualizations, and
                 facilitate the integration and cleaning of that data.
                 Most importantly, they should provide all these
                 capabilities seamlessly in the context of an
                 uninterrupted data analysis cycle. We present the
                 challenges and opportunities in building
                 next-generation visual analytics services.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2014:PFD,
  author =       "Daniel Deutch and Yuval Moskovitch and Val Tannen",
  title =        "A provenance framework for data-dependent process
                 analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "6",
  pages =        "457--468",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:06 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A data-dependent process (DDP) models an application
                 whose control flow is guided by a finite state machine,
                 as well as by the state of an underlying database. DDPs
                 are commonly found e.g., in e-commerce. In this paper
                 we develop a framework supporting the use of provenance
                 in static (temporal) analysis of possible DDP
                 executions. Using provenance support, analysts can
                 interactively test and explore the effect of
                 hypothetical modifications to a DDP's state machine
                 and/or to the underlying database. They can also extend
                 the analysis to incorporate the propagation of
                 annotations from meta-domains of interest, e.g., cost
                 or access privileges. Toward this goal we note that the
                 framework of semiring-based provenance was proven
                 highly effective in fulfilling similar needs in the
                 context of database queries. In this paper we consider
                 novel constructions that generalize the semiring
                 approach to the context of DDP analysis. These
                 constructions address two interacting new challenges:
                 (1) to combine provenance annotations for both
                 information that resides in the database and
                 information about external inputs (e.g., user choices),
                 and (2) to finitely capture infinite process
                 executions. We analyze our solution from theoretical
                 and experimental perspectives, proving its
                 effectiveness.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chiang:2014:TED,
  author =       "Yueh-Hsuan Chiang and AnHai Doan and Jeffrey F.
                 Naughton",
  title =        "Tracking entities in the dynamic world: a fast
                 algorithm for matching temporal records",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "6",
  pages =        "469--480",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:06 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Identifying records referring to the same real world
                 entity over time enables longitudinal data analysis.
                 However, difficulties arise from the dynamic nature of
                 the world: the entities described by a temporal data
                 set often evolve their states over time. While the
                 state of the art approach to temporal entity matching
                 achieves high accuracy, this approach is
                 computationally expensive and cannot handle large data
                 sets. In this paper, we present an approach that
                 achieves equivalent matching accuracy but takes far
                 less time. Our key insight is ``static first, dynamic
                 second.'' Our approach first runs an
                 evidence-collection pass, grouping records without
                 considering the possibility of entity evolution, as if
                 the world were ``static.'' Then, it merges clusters
                 from the initial grouping by determining whether an
                 entity might evolve from the state described in one
                 cluster to the state described in another cluster. This
                 intuitively reduces a difficult problem, record
                 matching with evolution, to two simpler problems:
                 record matching without evolution, then ``evolution
                 detection'' among the resulting clusters. Experimental
                 results on several temporal data sets show that our
                 approach provides an order of magnitude improvement in
                 run time over the state-of-the-art approach while
                 producing equivalent matching accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Conway:2014:EAS,
  author =       "Neil Conway and Peter Alvaro and Emily Andrews and
                 Joseph M. Hellerstein",
  title =        "{Edelweiss}: automatic storage reclamation for
                 distributed programming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "6",
  pages =        "481--492",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:06 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Event Log Exchange (ELE) is a common programming
                 pattern based on immutable state and messaging. ELE
                 sidesteps traditional challenges in distributed
                 consistency, at the expense of introducing new
                 challenges in designing space reclamation protocols to
                 avoid consuming unbounded storage. We introduce
                 Edelweiss, a sublanguage of Bloom that provides an ELE
                 programming model, yet automatically reclaims space
                 without programmer assistance. We describe techniques
                 to analyze Edelweiss programs and automatically
                 generate application-specific distributed space
                 reclamation logic. We show how Edelweiss can be used to
                 elegantly implement a variety of communication and
                 distributed storage protocols; the storage reclamation
                 code generated by Edelweiss effectively
                 garbage-collects state and often matches hand-written
                 protocols from the literature.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ntarmos:2014:RJQ,
  author =       "Nikos Ntarmos and Ioannis Patlakas and Peter
                 Triantafillou",
  title =        "Rank join queries in {NoSQL} databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "493--504",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Rank (i.e., top-$k$) join queries play a key role in
                 modern analytics tasks. However, despite their
                 importance and unlike centralized settings, they have
                 been completely overlooked in cloud NoSQL settings. We
                 attempt to fill this gap: We contribute a suite of
                 solutions and study their performance comprehensively.
                 Baseline solutions are offered using SQL-like languages
                 (like Hive and Pig), based on MapReduce jobs. We first
                 provide solutions that are based on specialized
                 indices, which may themselves be accessed using either
                 MapReduce or coordinator-based strategies. The first
                 index-based solution is based on inverted indices,
                 which are accessed with MapReduce jobs. The second
                 index-based solution adapts a popular centralized
                 rank-join algorithm. We further contribute a novel
                 statistical structure comprising histograms and Bloom
                 filters, which forms the basis for the third
                 index-based solution. We provide (i) MapReduce
                 algorithms showing how to build these indices and
                 statistical structures, (ii) algorithms to allow for
                 online updates to these indices, and (iii) query
                 processing algorithms utilizing them. We implemented
                 all algorithms in Hadoop (HDFS) and HBase and tested
                 them on TPC-H datasets of various scales, utilizing
                 different queries on tables of various sizes and
                 different score-attribute distributions. We ported our
                 implementations to Amazon EC2 and ``in-house'' lab
                 clusters of various scales. We provide performance
                 results for three metrics: query execution time,
                 network bandwidth consumption, and dollar-cost for
                 query execution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gupta:2014:BOS,
  author =       "Rahul Gupta and Alon Halevy and Xuezhi Wang and Steven
                 Euijong Whang and Fei Wu",
  title =        "{Biperpedia}: an ontology for search applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "505--516",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Search engines make significant efforts to recognize
                 queries that can be answered by structured data and
                 invest heavily in creating and maintaining
                 high-precision databases. While these databases have a
                 relatively wide coverage of entities, the number of
                 attributes they model (e.g., GDP, CAPITAL, ANTHEM) is
                 relatively small. Extending the number of attributes
                 known to the search engine can enable it to more
                 precisely answer queries from the long and heavy tail,
                 extract a broader range of facts from the Web, and
                 recover the semantics of tables on the Web. We describe
                 Biperpedia, an ontology with 1.6M (class, attribute)
                 pairs and 67K distinct attribute names. Biperpedia
                 extracts attributes from the query stream, and then
                 uses the best extractions to seed attribute extraction
                 from text. For every attribute Biperpedia saves a set
                 of synonyms and text patterns in which it appears,
                 thereby enabling it to recognize the attribute in more
                 contexts. In addition to a detailed analysis of the
                 quality of Biperpedia, we show that it can increase the
                 number of Web tables whose semantics we can recover by
                 more than a factor of 4 compared with Freebase.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elseidy:2014:GFS,
  author =       "Mohammed Elseidy and Ehab Abdelhamid and Spiros
                 Skiadopoulos and Panos Kalnis",
  title =        "{GraMi}: frequent subgraph and pattern mining in a
                 single large graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "517--528",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Mining frequent subgraphs is an important operation on
                 graphs; it is defined as finding all subgraphs that
                 appear frequently in a database according to a given
                 frequency threshold. Most existing work assumes a
                 database of many small graphs, but modern applications,
                 such as social networks, citation graphs, or
                 protein-protein interactions in bioinformatics, are
                 modeled as a single large graph. In this paper we
                 present GraMi, a novel framework for frequent subgraph
                 mining in a single large graph. GraMi undertakes a
                 novel approach that only finds the minimal set of
                 instances to satisfy the frequency threshold and avoids
                 the costly enumeration of all instances required by
                 previous approaches. We accompany our approach with a
                 heuristic and optimizations that significantly improve
                 performance. Additionally, we present an extension of
                 GraMi that mines frequent patterns. Compared to
                 subgraphs, patterns offer a more powerful version of
                 matching that captures transitive interactions between
                 graph nodes (like friend of a friend) which are very
                 common in modern applications. Finally, we present
                 CGraMi, a version supporting structural and semantic
                 constraints, and AGraMi, an approximate version
                 producing results with no false positives. Our
                 experiments on real data demonstrate that our framework
                 is up to 2 orders of magnitude faster and discovers
                 more interesting patterns than existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2014:LIO,
  author =       "Sheng Wang and David Maier and Beng Chin Ooi",
  title =        "Lightweight indexing of observational data in
                 log-structured storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "529--540",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Huge amounts of data are being generated by sensing
                 devices every day, recording the status of objects and
                 the environment. Such observational data is widely used
                 in scientific research. As the capabilities of sensors
                 keep improving, the data produced are drastically
                 expanding in precision and quantity, making it a
                 write-intensive domain. Log-structured storage is
                 capable of providing high write throughput, and hence
                 is a natural choice for managing large-scale
                 observational data. In this paper, we propose an
                 approach to indexing and querying observational data in
                 log-structured storage. Based on key traits of
                 observational data, we design a novel index approach
                 called the CR-index (Continuous Range Index), which
                 provides fast query performance without compromising
                 write throughput. It is a lightweight structure that is
                 fast to construct and often small enough to reside in
                 RAM. Our experimental results show that the CR-index is
                 superior in handling observational data compared to
                 other indexing techniques. While our focus is
                 scientific data, we believe our index will be effective
                 for other applications with similar properties, such as
                 process monitoring in manufacturing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2014:EES,
  author =       "Dawei Jiang and Gang Chen and Beng Chin Ooi and
                 Kian-Lee Tan and Sai Wu",
  title =        "{epiC}: an extensible and scalable system for
                 processing big data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "541--552",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Big Data problem is characterized by the so called
                 3V features: Volume --- a huge amount of data, Velocity
                 --- a high data ingestion rate, and Variety --- a mix
                 of structured data, semi-structured data, and
                 unstructured data. The state-of-the-art solutions to
                 the Big Data problem are largely based on the MapReduce
                 framework (aka its open source implementation Hadoop).
                 Although Hadoop handles the data volume challenge
                 successfully, it does not deal with the data variety
                 well since the programming interfaces and its
                 associated data processing model is inconvenient and
                 inefficient for handling structured data and graph
                 data. This paper presents epiC, an extensible system to
                 tackle the Big Data's data variety challenge. epiC
                 introduces a general Actor-like concurrent programming
                 model, independent of the data processing models, for
                 specifying parallel computations. Users process
                 multi-structured datasets with appropriate epiC
                 extensions, the implementation of a data processing
                 model best suited for the data type and auxiliary code
                 for mapping that data processing model into epiC's
                 concurrent programming model. Like Hadoop, programs
                 written in this way can be automatically parallelized
                 and the runtime system takes care of fault tolerance
                 and inter-machine communications. We present the design
                 and implementation of epiC's concurrent programming
                 model. We also present two customized data processing
                 model, an optimized MapReduce extension and a
                 relational model, on top of epiC. Experiments
                 demonstrate the effectiveness and efficiency of our
                 proposed epiC.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Boehm:2014:HPS,
  author =       "Matthias Boehm and Shirish Tatikonda and Berthold
                 Reinwald and Prithviraj Sen and Yuanyuan Tian and
                 Douglas R. Burdick and Shivakumar Vaithyanathan",
  title =        "Hybrid parallelization strategies for large-scale
                 machine learning in {SystemML}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "553--564",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "SystemML aims at declarative, large-scale machine
                 learning (ML) on top of MapReduce, where high-level ML
                 scripts with R-like syntax are compiled to programs of
                 MR jobs. The declarative specification of ML algorithms
                 enables --- in contrast to existing large-scale machine
                 learning libraries --- automatic optimization.
                 SystemML's primary focus is on data parallelism but
                 many ML algorithms inherently exhibit opportunities for
                 task parallelism as well. A major challenge is how to
                 efficiently combine both types of parallelism for
                 arbitrary ML scripts and workloads. In this paper, we
                 present a systematic approach for combining task and
                 data parallelism for large-scale machine learning on
                 top of MapReduce. We employ a generic Parallel FOR
                 construct (ParFOR) as known from high performance
                 computing (HPC). Our core contributions are (1)
                 complementary parallelization strategies for exploiting
                 multi-core and cluster parallelism, as well as (2) a
                 novel cost-based optimization framework for
                 automatically creating optimal parallel execution
                 plans. Experiments on a variety of use cases showed
                 that this achieves both efficiency and scalability due
                 to automatic adaptation to ad-hoc workloads and unknown
                 data characteristics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2014:SSG,
  author =       "Shengqi Yang and Yinghui Wu and Huan Sun and Xifeng
                 Yan",
  title =        "Schemaless and structureless graph querying",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "565--576",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Querying complex graph databases such as knowledge
                 graphs is a challenging task for non-professional
                 users. Due to their complex schemas and variational
                 information descriptions, it becomes very hard for
                 users to formulate a query that can be properly
                 processed by the existing systems. We argue that for a
                 user-friendly graph query engine, it must support
                 various kinds of transformations such as synonym,
                 abbreviation, and ontology. Furthermore, the derived
                 query results must be ranked in a principled manner. In
                 this paper, we introduce a novel framework enabling
                 schemaless and structureless graph querying (SLQ),
                 where a user need not describe queries precisely as
                 required by most databases. The query engine is built
                 on a set of transformation functions that automatically
                 map keywords and linkages from a query to their matches
                 in a graph. It automatically learns an effective
                 ranking model, without assuming manually labeled
                 training examples, and can efficiently return top
                 ranked matches using graph sketch and belief
                 propagation. The architecture of SLQ is elastic for
                 ``plug-in'' new transformation functions and query
                 logs. Our experimental results show that this new graph
                 querying paradigm is promising: It identifies
                 high-quality matches for both keyword and graph queries
                 over real-life knowledge graphs, and outperforms
                 existing methods significantly in terms of
                 effectiveness and efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Salihoglu:2014:OGA,
  author =       "Semih Salihoglu and Jennifer Widom",
  title =        "Optimizing graph algorithms on {Pregel}-like systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "577--588",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of implementing graph algorithms
                 efficiently on Pregel-like systems, which can be
                 surprisingly challenging. Standard graph algorithms in
                 this setting can incur unnecessary inefficiencies such
                 as slow convergence or high communication or
                 computation cost, typically due to structural
                 properties of the input graphs such as large diameters
                 or skew in component sizes. We describe several
                 optimization techniques to address these
                 inefficiencies. Our most general technique is based on
                 the idea of performing some serial computation on a
                 tiny fraction of the input graph, complementing
                 Pregel's vertex-centric parallelism. We base our study
                 on thorough implementations of several fundamental
                 graph algorithms, some of which have, to the best of
                 our knowledge, not been implemented on Pregel-like
                 systems before. The algorithms and optimizations we
                 describe are fully implemented in our open-source
                 Pregel implementation. We present detailed experiments
                 showing that our optimization techniques improve
                 runtime significantly on a variety of very large graph
                 datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2014:TCF,
  author =       "You Wu and Pankaj K. Agarwal and Chengkai Li and Jun
                 Yang and Cong Yu",
  title =        "Toward computational fact-checking",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "7",
  pages =        "589--600",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:07 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Our news are saturated with claims of ``facts'' made
                 from data. Database research has in the past focused on
                 how to answer queries, but has not devoted much
                 attention to discerning more subtle qualities of the
                 resulting claims, e.g., is a claim ``cherry-picking''?
                 This paper proposes a framework that models claims
                 based on structured data as parameterized queries. A
                 key insight is that we can learn a lot about a claim by
                 perturbing its parameters and seeing how its conclusion
                 changes. This framework lets us formulate practical
                 fact-checking tasks --- reverse-engineering (often
                 intentionally) vague claims, and countering
                 questionable claims --- as computational problems.
                 Along with the modeling framework, we develop an
                 algorithmic framework that enables efficient
                 instantiations of ``meta'' algorithms by supplying
                 appropriate algorithmic building blocks. We present
                 real-world examples and experiments that demonstrate
                 the power of our model, efficiency of our algorithms,
                 and usefulness of their results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arenas:2014:PAB,
  author =       "Marcelo Arenas and Gonzalo D{\'\i}az and Achille
                 Fokoue and Anastasios Kementsietsidis and Kavitha
                 Srinivas",
  title =        "A principled approach to bridging the gap between
                 graph data and their schemas",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "8",
  pages =        "601--612",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:10 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Although RDF graph data often come with an associated
                 schema, recent studies have proven that real RDF data
                 rarely conform to their perceived schemas. Since a
                 number of data management decisions, including storage
                 layouts, indexing, and efficient query processing, use
                 schemas to guide the decision making, it is imperative
                 to have an accurate description of the structuredness
                 of the data at hand (how well the data conform to the
                 schema). In this paper, we have approached the study of
                 the structuredness of an RDF graph in a principled way:
                 we propose a framework for specifying structuredness
                 functions, which gauge the degree to which an RDF graph
                 conforms to a schema. In particular, we first define a
                 formal language for specifying structuredness functions
                 with expressions we call rules. This language allows a
                 user to state a rule to which an RDF graph may fully or
                 partially conform. Then we consider the issue of
                 discovering a refinement of a sort (type) by
                 partitioning the dataset into subsets whose
                 structuredness is over a specified threshold. In
                 particular, we prove that the natural decision problem
                 associated to this refinement problem is NP-complete,
                 and we provide a natural translation of this problem
                 into Integer Linear Programming (ILP). Finally, we test
                 this ILP solution with three real world datasets and
                 three different and intuitive rules, which gauge the
                 structuredness in different ways. We show that the
                 rules give meaningful refinements of the datasets,
                 showing that our language can be a powerful tool for
                 understanding the structure of RDF data, and we show
                 that the ILP solution is practical for a large fraction
                 of existing data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:EPS,
  author =       "Dongxiang Zhang and Chee-Yong Chan and Kian-Lee Tan",
  title =        "An efficient publish\slash subscribe index for
                 e-commerce databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "8",
  pages =        "613--624",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:10 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many of today's publish/subscribe (pub/sub) systems
                 have been designed to cope with a large volume of
                 subscriptions and high event arrival rate (velocity).
                 However, in many novel applications (such as
                 e-commerce), there is an increasing variety of items,
                 each with different attributes. This leads to a very
                 high-dimensional and sparse database that existing
                 pub/sub systems can no longer support effectively. In
                 this paper, we propose an efficient in-memory index
                 that is scalable to the volume and update of
                 subscriptions, the arrival rate of events and the
                 variety of subscribable attributes. The index is also
                 extensible to support complex scenarios such as
                 prefix/suffix filtering and regular expression
                 matching. We conduct extensive experiments on synthetic
                 datasets and two real datasets (AOL query log and Ebay
                 products). The results demonstrate the superiority of
                 our index over state-of-the-art methods: our index
                 incurs orders of magnitude less index construction
                 time, consumes a small amount of memory and performs
                 event matching efficiently.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2014:SSJ,
  author =       "Yu Jiang and Guoliang Li and Jianhua Feng and Wen-Syan
                 Li",
  title =        "String similarity joins: an experimental evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "8",
  pages =        "625--636",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:10 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "String similarity join is an important operation in
                 data integration and cleansing that finds similar
                 string pairs from two collections of strings. More than
                 ten algorithms have been proposed to address this
                 problem in the recent two decades. However, existing
                 algorithms have not been thoroughly compared under the
                 same experimental framework. For example, some
                 algorithms are tested only on specific datasets. This
                 makes it rather difficult for practitioners to decide
                 which algorithms should be used for various scenarios.
                 To address this problem, in this paper we provide a
                 comprehensive survey on a wide spectrum of existing
                 string similarity join algorithms, classify them into
                 different categories based on their main techniques,
                 and compare them through extensive experiments on a
                 variety of real-world datasets with different
                 characteristics. We also report comprehensive findings
                 obtained from the experiments and provide new insights
                 about the strengths and weaknesses of existing
                 similarity join algorithms which can guide
                 practitioners to select appropriate algorithms for
                 various scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Proserpio:2014:CDS,
  author =       "Davide Proserpio and Sharon Goldberg and Frank
                 McSherry",
  title =        "Calibrating data to sensitivity in private data
                 analysis: a platform for differentially-private
                 analysis of weighted datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "8",
  pages =        "637--648",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:10 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present an approach to differentially private
                 computation in which one does not scale up the
                 magnitude of noise for challenging queries, but rather
                 scales down the contributions of challenging records.
                 While scaling down all records uniformly is equivalent
                 to scaling up the noise magnitude, we show that scaling
                 records non-uniformly can result in substantially
                 higher accuracy by bypassing the worst-case
                 requirements of differential privacy for the noise
                 magnitudes. This paper details the data analysis
                 platform wPINQ, which generalizes the Privacy
                 Integrated Query (PINQ) to weighted datasets. Using a
                 few simple operators (including a non-uniformly scaling
                 Join operator) wPINQ can reproduce (and improve)
                 several recent results on graph analysis and introduce
                 new generalizations (e.g., counting triangles with
                 given degrees). We also show how to integrate
                 probabilistic inference techniques to synthesize
                 datasets respecting more complicated (and less easily
                 interpreted) measurements.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2014:EMM,
  author =       "Wei Wang and Beng Chin Ooi and Xiaoyan Yang and
                 Dongxiang Zhang and Yueting Zhuang",
  title =        "Effective multi-modal retrieval based on stacked
                 auto-encoders",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "8",
  pages =        "649--660",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 09:22:10 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multi-modal retrieval is emerging as a new search
                 paradigm that enables seamless information retrieval
                 from various types of media. For example, users can
                 simply snap a movie poster to search relevant reviews
                 and trailers. To solve the problem, a set of mapping
                 functions are learned to project high-dimensional
                 features extracted from data of different media types
                 into a common low-dimensional space so that metric
                 distance measures can be applied. In this paper, we
                 propose an effective mapping mechanism based on deep
                 learning (i.e., stacked auto-encoders) for multi-modal
                 retrieval. Mapping functions are learned by optimizing
                 a new objective function, which captures both
                 intra-modal and inter-modal semantic relationships of
                 data from heterogeneous sources effectively. Compared
                 with previous works which require a substantial amount
                 of prior knowledge such as similarity matrices of
                 intra-modal data and ranking examples, our method
                 requires little prior knowledge. Given a large training
                 dataset, we split it into mini-batches and continually
                 adjust the mapping functions for each batch of input.
                 Hence, our method is memory efficient with respect to
                 the data volume. Experiments on three real datasets
                 illustrate that our proposed method achieves
                 significant improvement in search accuracy over the
                 state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Song:2014:PNF,
  author =       "Renchu Song and Weiwei Sun and Baihua Zheng and Yu
                 Zheng",
  title =        "{PRESS}: a novel framework of trajectory compression
                 in road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "661--672",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Location data becomes more and more important. In this
                 paper, we focus on the trajectory data, and propose a
                 new framework, namely PRESS (Paralleled
                 Road-Network-Based Trajectory Compression), to
                 effectively compress trajectory data under road network
                 constraints. Different from existing work, PRESS
                 proposes a novel representation for trajectories to
                 separate the spatial representation of a trajectory
                 from the temporal representation, and proposes a Hybrid
                 Spatial Compression (HSC) algorithm and error Bounded
                 Temporal Compression (BTC) algorithm to compress the
                 spatial and temporal information of trajectories
                 respectively. PRESS also supports common
                 spatial-temporal queries without fully decompressing
                 the data. Through an extensive experimental study on
                 real trajectory dataset, PRESS significantly
                 outperforms existing approaches in terms of saving
                 storage cost of trajectory data with bounded errors.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2014:FCO,
  author =       "Yajun Yang and Hong Gao and Jeffrey Xu Yu and
                 Jianzhong Li",
  title =        "Finding the cost-optimal path with time constraint
                 over time-dependent graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "673--684",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Shortest path query is an important problem and has
                 been well studied in static graphs. However, in
                 practice, the costs of edges in graphs always change
                 over time. We call such graphs as time-dependent
                 graphs. In this paper, we study how to find a
                 cost-optimal path with time constraint in
                 time-dependent graphs. Most existing works regarding
                 the Time-Dependent Shortest Path (TDSP) problem focus
                 on finding a shortest path with the minimum travel
                 time. All these works are based on the following fact:
                 the earliest arrival time at a vertex $v$ can be
                 derived from the earliest arrival time at $v$'s
                 neighbors. Unfortunately, this fact does not hold for
                 our problem. In this paper, we propose a novel
                 algorithm to compute a cost-optimal path with time
                 constraint in time-dependent graphs. We show that the
                 time and space complexities of our algorithm are $ O(k
                 n \log n + m k)$ and $ O((n + m) k)$ respectively. We
                 confirm the effectiveness and efficiency of our
                 algorithm through conducting experiments on real
                 datasets with synthetic cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Parameswaran:2014:OCP,
  author =       "Aditya Parameswaran and Stephen Boyd and Hector
                 Garcia-Molina and Ashish Gupta and Neoklis Polyzotis
                 and Jennifer Widom",
  title =        "Optimal crowd-powered rating and filtering
                 algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "685--696",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We focus on crowd-powered filtering, i.e., filtering a
                 large set of items using humans. Filtering is one of
                 the most commonly used building blocks in crowdsourcing
                 applications and systems. While solutions for
                 crowd-powered filtering exist, they make a range of
                 implicit assumptions and restrictions, ultimately
                 rendering them not powerful enough for real-world
                 applications. We describe two approaches to discard
                 these implicit assumptions and restrictions: one, that
                 carefully generalizes prior work, leading to an
                 optimal, but often-times intractable solution, and
                 another, that provides a novel way of reasoning about
                 filtering strategies, leading to a sometimes
                 suboptimal, but efficiently computable solution (that
                 is asymptotically close to optimal). We demonstrate
                 that our techniques lead to significant reductions in
                 error of up to 30\% for fixed cost over prior work in a
                 novel crowdsourcing application: peer evaluation in
                 online courses.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gruenheid:2014:IRL,
  author =       "Anja Gruenheid and Xin Luna Dong and Divesh
                 Srivastava",
  title =        "Incremental record linkage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "697--708",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Record linkage clusters records such that each cluster
                 corresponds to a single distinct real-world entity. It
                 is a crucial step in data cleaning and data
                 integration. In the big data era, the velocity of data
                 updates is often high, quickly making previous linkage
                 results obsolete. This paper presents an end-to-end
                 framework that can incrementally and efficiently update
                 linkage results when data updates arrive. Our
                 algorithms not only allow merging records in the
                 updates with existing clusters, but also allow
                 leveraging new evidence from the updates to fix
                 previous linkage errors. Experimental results on three
                 real and synthetic data sets show that our algorithms
                 can significantly reduce linkage time without
                 sacrificing linkage quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Roy:2014:LLH,
  author =       "Pratanu Roy and Jens Teubner and Rainer Gemulla",
  title =        "Low-latency handshake join",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "709--720",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This work revisits the processing of stream joins on
                 modern hardware architectures. Our work is based on the
                 recently proposed handshake join algorithm, which is a
                 mechanism to parallelize the processing of stream joins
                 in a NUMA-aware and hardware-friendly manner. Handshake
                 join achieves high throughput and scalability, but it
                 suffers from a high latency penalty and a
                 non-deterministic ordering of the tuples in the
                 physical result stream. In this paper, we first
                 characterize the latency behavior of the handshake join
                 and then propose a new low-latency handshake join
                 algorithm, which substantially reduces latency without
                 sacrificing throughput or scalability. We also present
                 a technique to generate punctuated result streams with
                 very little overhead; such punctuations allow the
                 generation of correctly ordered physical output streams
                 with negligible effect on overall throughput and
                 latency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2014:PPT,
  author =       "Huanhuan Wu and James Cheng and Silu Huang and Yiping
                 Ke and Yi Lu and Yanyan Xu",
  title =        "Path problems in temporal graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "721--732",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Shortest path is a fundamental graph problem with
                 numerous applications. However, the concept of classic
                 shortest path is insufficient or even flawed in a
                 temporal graph, as the temporal information determines
                 the order of activities along any path. In this paper,
                 we show the shortcomings of classic shortest path in a
                 temporal graph, and study various concepts of
                 ``shortest'' path for temporal graphs. Computing these
                 temporal paths is challenging as subpaths of a
                 ``shortest'' path may not be ``shortest'' in a temporal
                 graph. We investigate properties of the temporal paths
                 and propose efficient algorithms to compute them. We
                 tested our algorithms on real world temporal graphs to
                 verify their efficiency, and also show that temporal
                 paths are essential for studying temporal graphs by
                 comparing shortest paths in normal static graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2014:RRI,
  author =       "Xin Cao and Gao Cong and Christian S. Jensen and Man
                 Lung Yiu",
  title =        "Retrieving regions of interest for user exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "733--744",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We consider an application scenario where points of
                 interest (PoIs) each have a web presence and where a
                 web user wants to identify a region that contains
                 relevant PoIs that are relevant to a set of keywords,
                 e.g., in preparation for deciding where to go to
                 conveniently explore the PoIs. Motivated by this, we
                 propose the length-constrained maximum-sum region
                 (LCMSR) query that returns a spatial-network region
                 that is located within a general region of interest,
                 that does not exceed a given size constraint, and that
                 best matches query keywords. Such a query maximizes the
                 total weight of the PoIs in it w.r.t. the query
                 keywords. We show that it is NP-hard to answer this
                 query. We develop an approximation algorithm with a (5
                 + \epsilon) approximation ratio utilizing a technique
                 that scales node weights into integers. We also propose
                 a more efficient heuristic algorithm and a greedy
                 algorithm. Empirical studies on real data offer
                 detailed insight into the accuracy of the proposed
                 algorithms and show that the proposed algorithms are
                 capable of computing results efficiently and
                 effectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2014:SLE,
  author =       "Yingfan Liu and Jiangtao Cui and Zi Huang and Hui Li
                 and Heng Tao Shen",
  title =        "{SK--LSH}: an efficient index structure for
                 approximate nearest neighbor search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "745--756",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Approximate Nearest Neighbor (ANN) search in high
                 dimensional space has become a fundamental paradigm in
                 many applications. Recently, Locality Sensitive Hashing
                 (LSH) and its variants are acknowledged as the most
                 promising solutions to ANN search. However,
                 state-of-the-art LSH approaches suffer from a drawback:
                 accesses to candidate objects require a large number of
                 random I/O operations. In order to guarantee the
                 quality of returned results, sufficient objects should
                 be verified, which would consume enormous I/O cost. To
                 address this issue, we propose a novel method, called
                 SortingKeys-LSH (SK-LSH), which reduces the number of
                 page accesses through locally arranging candidate
                 objects. We firstly define a new measure to evaluate
                 the distance between the compound hash keys of two
                 points. A linear order relationship on the set of
                 compound hash keys is then created, and the
                 corresponding data points can be sorted accordingly.
                 Hence, data points that are close to each other
                 according to the distance measure can be stored locally
                 in an index file. During the ANN search, only a limited
                 number of disk pages among few index files are
                 necessary to be accessed for sufficient candidate
                 generation and verification, which not only
                 significantly reduces the response time but also
                 improves the accuracy of the returned results. Our
                 exhaustive empirical study over several real-world data
                 sets demonstrates the superior efficiency and accuracy
                 of SK-LSH for the ANN search, compared with
                 state-of-the-art methods, including LSB, C2LSH and
                 CK-Means.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lin:2014:AFP,
  author =       "Bing-Rong Lin and Daniel Kifer",
  title =        "On arbitrage-free pricing for general data queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "757--768",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data is a commodity. Recent research has considered
                 the mathematical problem of setting prices for
                 different queries over data. Ideal pricing functions
                 need to be flexible --defined for arbitrary queries
                 (select-project-join, aggregate, random sample, and
                 noisy privacy-preserving queries). They should be
                 fine-grained --- a consumer should not be required to
                 buy the entire database to get answers to simple
                 ``low-information'' queries (such as selecting only a
                 few tuples or aggregating over only one attribute).
                 Similarly, a consumer may not want to pay a large
                 amount of money, only to discover that the database is
                 empty. Finally, pricing functions should satisfy
                 consistency conditions such as being ``arbitrage-free''
                 --- consumers should not be able to circumvent the
                 pricing function by deducing the answer to an expensive
                 query from a few cheap queries. Previously proposed
                 pricing functions satisfy some of these criteria (i.e.
                 they are defined for restricted subclasses of queries
                 and/or use relaxed conditions for avoiding arbitrage).
                 In this paper, we study arbitrage-free pricing
                 functions defined for arbitrary queries. We propose new
                 necessary conditions for avoiding arbitrage and provide
                 new arbitrage-free pricing functions. We also prove
                 several negative results related to the tension between
                 flexible pricing and avoiding arbitrage, and show how
                 this tension often results in unreasonable prices.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:SMF,
  author =       "Chao Zhang and Jiawei Han and Lidan Shou and Jiajun Lu
                 and Thomas {La Porta}",
  title =        "{Splitter}: mining fine-grained sequential patterns in
                 semantic trajectories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "769--780",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Driven by the advance of positioning technology and
                 the popularity of location-sharing services,
                 semantic-enriched trajectory data have become
                 unprecedentedly available. The sequential patterns
                 hidden in such data, when properly defined and
                 extracted, can greatly benefit tasks like targeted
                 advertising and urban planning. Unfortunately, classic
                 sequential pattern mining algorithms developed for
                 transactional data cannot effectively mine patterns in
                 semantic trajectories, mainly because the places in the
                 continuous space cannot be regarded as independent
                 ``items''. Instead, similar places need to be grouped
                 to collaboratively form frequent sequential patterns.
                 That said, it remains a challenging task to mine what
                 we call fine-grained sequential patterns, which must
                 satisfy spatial compactness, semantic consistency and
                 temporal continuity simultaneously. We propose Splitter
                 to effectively mine such fine-grained sequential
                 patterns in two steps. In the first step, it retrieves
                 a set of spatially coarse patterns, each attached with
                 a set of trajectory snippets that precisely record the
                 pattern's occurrences in the database. In the second
                 step, Splitter breaks each coarse pattern into
                 fine-grained ones in a top-down manner, by
                 progressively detecting dense and compact clusters in a
                 higher-dimensional space spanned by the snippets.
                 Splitter uses an effective algorithm called weighted
                 snippet shift to detect such clusters, and leverages a
                 divide-and-conquer strategy to speed up the top-down
                 pattern splitting process. Our experiments on both real
                 and synthetic data sets demonstrate the effectiveness
                 and efficiency of Splitter.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Floratou:2014:TBW,
  author =       "Avrilia Floratou and Frank Bertsch and Jignesh M.
                 Patel and Georgios Laskaris",
  title =        "Towards building wind tunnels for data center design",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "9",
  pages =        "781--784",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:18 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data center design is a tedious and expensive process.
                 Recently, this process has become even more challenging
                 as users of cloud services expect to have guaranteed
                 levels of availability, durability and performance. A
                 new challenge for the service providers is to find the
                 most cost-effective data center design and
                 configuration that will accommodate the users'
                 expectations, on ever-changing workloads, and
                 constantly evolving hardware and software components.
                 In this paper, we argue that data center design should
                 become a systematic process. First, it should be done
                 using an integrated approach that takes into account
                 both the hardware and the software interdependencies,
                 and their impact on users' expectations. Second, it
                 should be performed in a ``wind tunnel'', which uses
                 large-scale simulation to systematically explore the
                 impact of a data center configuration on both the
                 users' and the service providers' requirements. We
                 believe that this is the first step towards systematic
                 data center design --- an exciting area for future
                 research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:RRQ,
  author =       "Zhao Zhang and Cheqing Jin and Qiangqiang Kang",
  title =        "Reverse $k$-ranks query",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "785--796",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding matching customers for a given product based
                 on individual user's preference is critical for many
                 applications, especially in e-commerce. Recently, the
                 reverse top-$k$ query is proposed to return a number of
                 customers who regard a given product as one of the $k$
                 most favorite products based on a linear model.
                 Although a few ``hot'' products can be returned to some
                 customers via reverse top-$k$ query, a large proportion
                 of products (over 90\%, as our example illustrates, see
                 Figure 2) cannot find any matching customers. Inspired
                 by this observation, we propose a new kind of query
                 ($R$-$k$ Ranks) which finds for a given product, the
                 top-$k$ customers whose rank for the product is highest
                 among all customers, to ensure 100\% coverage for any
                 given product, no matter it is hot or niche. Not
                 limited to e-commerce, the concept of customer ---
                 product can be extended to a wider range of
                 applications, such as dating and job-hunting.
                 Unfortunately, existing approaches for reverse top-$k$
                 query cannot be used to handle $R$-$k$ Ranks
                 conveniently due to infeasibility of getting enough
                 elements for the query result. Hence, we propose three
                 novel approaches to efficiently process $R$-$k$ Ranks
                 query, including one tree-based method and two
                 batch-pruning-based methods. Analysis of theoretical
                 and experimental results on real and synthetic data
                 sets illustrates the efficacy of the proposed
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jugel:2014:MVO,
  author =       "Uwe Jugel and Zbigniew Jerzak and Gregor Hackenbroich
                 and Gregor Hackenbroich and Volker Markl",
  title =        "{M4}: a visualization-oriented time series data
                 aggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "797--808",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Visual analysis of high-volume time series data is
                 ubiquitous in many industries, including finance,
                 banking, and discrete manufacturing. Contemporary,
                 RDBMS-based systems for visualization of high-volume
                 time series data have difficulty to cope with the hard
                 latency requirements and high ingestion rates of
                 interactive visualizations. Existing solutions for
                 lowering the volume of time series data disregard the
                 semantics of visualizations and result in visualization
                 errors. In this work, we introduce M4, an
                 aggregation-based time series dimensionality reduction
                 technique that provides error-free visualizations at
                 high data reduction rates. Focusing on line charts, as
                 the predominant form of time series visualization, we
                 explain in detail the drawbacks of existing data
                 reduction techniques and how our approach outperforms
                 state of the art, by respecting the process of line
                 rasterization. We describe how to incorporate
                 aggregation-based dimensionality reduction at the query
                 level in a visualization-driven query rewriting system.
                 Our approach is generic and applicable to any
                 visualization system that uses an RDBMS as data source.
                 Using real world data sets from high tech
                 manufacturing, stock markets, and sports analytics
                 domains we demonstrate that our visualization-oriented
                 data aggregation can reduce data volumes by up to two
                 orders of magnitude, while preserving perfect
                 visualizations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ghashami:2014:CMA,
  author =       "Mina Ghashami and Jeff M. Phillips and Feifei Li",
  title =        "Continuous matrix approximation on distributed data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "809--820",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Tracking and approximating data matrices in streaming
                 fashion is a fundamental challenge. The problem
                 requires more care and attention when data comes from
                 multiple distributed sites, each receiving a stream of
                 data. This paper considers the problem of ``tracking
                 approximations to a matrix'' in the distributed
                 streaming model. In this model, there are $m$
                 distributed sites each observing a distinct stream of
                 data (where each element is a row of a distributed
                 matrix) and has a communication channel with a
                 coordinator, and the goal is to track an \epsilon
                 -approximation to the norm of the matrix along any
                 direction. To that end, we present novel algorithms to
                 address the matrix approximation problem. Our
                 algorithms maintain a smaller matrix $B$, as an
                 approximation to a distributed streaming matrix $A$,
                 such that for any unit vector $x$: $ | \, || A x ||^2 -
                 || B x ||^2 | \leq \epsilon || A ||^2_F$. Our
                 algorithms work in streaming fashion and incur small
                 communication, which is critical for distributed
                 computation. Our best method is deterministic and uses
                 only $ O((m / \epsilon) \log (\beta N))$ communication,
                 where $N$ is the size of stream (at the time of the
                 query) and $ \beta $ is an upperbound on the squared
                 norm of any row of the matrix. In addition to proving
                 all algorithmic properties theoretically, extensive
                 experiments with real large datasets demonstrate the
                 efficiency of these protocols.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2014:EAD,
  author =       "Kun Ren and Alexander Thomson and Daniel J. Abadi",
  title =        "An evaluation of the advantages and disadvantages of
                 deterministic database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "821--832",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent proposals for deterministic database system
                 designs argue that deterministic database systems
                 facilitate replication since the same input can be
                 independently sent to two different replicas without
                 concern for replica divergence. In addition, they argue
                 that determinism yields performance benefits due to (1)
                 the introduction of deadlock avoidance techniques, (2)
                 the reduction (or elimination) of distributed commit
                 protocols, and (3) light-weight locking. However, these
                 performance benefits are not universally applicable,
                 and there exist several disadvantages of determinism,
                 including (1) the additional overhead of processing
                 transactions for which it is not known in advance what
                 data will be accessed, (2) an inability to abort
                 transactions arbitrarily (e.g., in the case of database
                 or partition overload), and (3) the increased latency
                 required by a preprocessing layer that ensures that the
                 same input is sent to every replica. This paper
                 presents a thorough experimental study that carefully
                 investigates both the advantages and disadvantages of
                 determinism, in order to give a database user a more
                 complete understanding of which database to use for a
                 given database workload and cluster configuration.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:EMD,
  author =       "Hao Zhang and Bogdan Marius Tudor and Gang Chen and
                 Beng Chin Ooi",
  title =        "Efficient in-memory data management: an analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "833--836",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper analyzes the performance of three systems
                 for in-memory data management: Memcached, Redis and the
                 Resilient Distributed Datasets (RDD) implemented by
                 Spark. By performing a thorough performance analysis of
                 both analytics operations and fine-grained object
                 operations such as set/get, we show that neither system
                 handles efficiently both types of workloads. For
                 Memcached and Redis the CPU and I/O performance of the
                 TCP stack are the bottlenecks --- even when serving
                 in-memory objects within a single server node. RDD does
                 not support efficient get operation for random objects,
                 due to a large startup cost of the get job. Our
                 analysis reveals a set of features that a system must
                 support in order to achieve efficient in-memory data
                 management.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aluc:2014:WMW,
  author =       "G{\"u}nes Alu{\c{c}} and M. Tamer {\"O}zsu and
                 Khuzaima Daudjee",
  title =        "Workload matters: why {RDF} databases need a new
                 design",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "837--840",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Resource Description Framework (RDF) is a standard
                 for conceptually describing data on the Web, and SPARQL
                 is the query language for RDF. As RDF is becoming
                 widely utilized, RDF data management systems are being
                 exposed to more diverse and dynamic workloads. Existing
                 systems are workload-oblivious, and are therefore
                 unable to provide consistently good performance. We
                 propose a vision for a workload-aware and adaptive
                 system. To realize this vision, we re-evaluate relevant
                 existing physical design criteria for RDF and address
                 the resulting set of new challenges.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alsubaiee:2014:SMA,
  author =       "Sattam Alsubaiee and Alexander Behm and Vinayak Borkar
                 and Zachary Heilbron and Young-Seok Kim and Michael J.
                 Carey and Markus Dreseler and Chen Li",
  title =        "Storage management in {AsterixDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "841--852",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Social networks, online communities, mobile devices,
                 and instant messaging applications generate complex,
                 unstructured data at a high rate, resulting in large
                 volumes of data. This poses new challenges for data
                 management systems that aim to ingest, store, index,
                 and analyze such data efficiently. In response, we
                 released the first public version of AsterixDB, an
                 open-source Big Data Management System (BDMS), in June
                 of 2013. This paper describes the storage management
                 layer of AsterixDB, providing a detailed description of
                 its ingestion-oriented approach to local storage and a
                 set of initial measurements of its ingestion-related
                 performance characteristics. In order to support high
                 frequency insertions, AsterixDB has wholly adopted
                 Log-Structured Merge-trees as the storage technology
                 for all of its index structures. We describe how the
                 AsterixDB software framework enables ``LSM-ification''
                 (conversion from an in-place update, disk-based data
                 structure to a deferred-update, append-only data
                 structure) of any kind of index structure that supports
                 certain primitive operations, enabling the index to
                 ingest data efficiently. We also describe how AsterixDB
                 ensures the ACID properties for operations involving
                 multiple heterogeneous LSM-based indexes. Lastly, we
                 highlight the challenges related to managing the
                 resources of a system when many LSM indexes are used
                 concurrently and present AsterixDB's initial
                 solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Klonatos:2014:BEQ,
  author =       "Yannis Klonatos and Christoph Koch and Tiark Rompf and
                 Hassan Chafi",
  title =        "Building efficient query engines in a high-level
                 language",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "853--864",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See errata \cite{Klonatos:2014:EBE}.",
  abstract =     "In this paper we advocate that it is time for a
                 radical rethinking of database systems design.
                 Developers should be able to leverage high-level
                 programming languages without having to pay a price in
                 efficiency. To realize our vision of abstraction
                 without regret, we present LegoBase, a query engine
                 written in the high-level programming language Scala.
                 The key technique to regain efficiency is to apply
                 generative programming: the Scala code that constitutes
                 the query engine, despite its high-level appearance, is
                 actually a program generator that emits specialized,
                 low-level C code. We show how the combination of
                 high-level and generative programming allows to easily
                 implement a wide spectrum of optimizations that are
                 difficult to achieve with existing low-level query
                 compilers, and how it can continuously optimize the
                 query engine. We evaluate our approach with the TPC-H
                 benchmark and show that: (a) with all optimizations
                 enabled, our architecture significantly outperforms a
                 commercial in-memory database system as well as an
                 existing query compiler, (b) these performance
                 improvements require programming just a few hundred
                 lines of high-level code instead of complicated
                 low-level code that is required by existing query
                 compilers and, finally, that (c) the compilation
                 overhead is low compared to the overall execution time,
                 thus making our approach usable in practice for
                 efficiently compiling query engines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2014:SLT,
  author =       "Tianzheng Wang and Ryan Johnson",
  title =        "Scalable logging through emerging non-volatile
                 memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "865--876",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Emerging byte-addressable, non-volatile memory (NVM)
                 is fundamentally changing the design principle of
                 transaction logging. It potentially invalidates the
                 need for flush-before-commit as \log records are
                 persistent immediately upon write. Distributed
                 logging---a once prohibitive technique for single node
                 systems in the DRAM era---becomes a promising solution
                 to easing the logging bottleneck because of the
                 non-volatility and high performance of NVM. In this
                 paper, we advocate NVM and distributed logging on
                 multicore and multi-socket hardware. We identify the
                 challenges brought by distributed logging and discuss
                 solutions. To protect committed work in NVM-based
                 systems, we propose passive group commit, a
                 lightweight, practical approach that leverages existing
                 hardware and group commit. We expect that durable
                 processor cache is the ultimate solution to protecting
                 committed work and building reliable, scalable
                 NVM-based systems in general. We evaluate distributed
                 logging with logging-intensive workloads and show that
                 distributed logging can achieve as much as $ \approx 3
                 \times $ speedup over centralized logging in a modern
                 DBMS and that passive group commit only induces
                 minuscule overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2014:WDM,
  author =       "Bingsheng He",
  title =        "When data management systems meet approximate
                 hardware: challenges and opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "877--880",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, approximate hardware designs have got many
                 research interests in the computer architecture
                 community. The essential idea of approximate hardware
                 is that the hardware components such as CPU, memory and
                 storage can trade off the accuracy of results for
                 increased performance, reduced energy consumption, or
                 both. We propose a DBMS ApproxiDB with its design,
                 implementation and optimization aware of the underlying
                 approximate hardware. ApproxiDB will run on a hybrid
                 machine consisting of both approximate hardware and
                 precise hardware (i.e., the conventional hardware
                 without sacrificing the accuracy). With approximate
                 hardware, ApproxiDB can efficiently support the concept
                 of approximate query processing, without the overhead
                 of pre-computed synopses or sampling techniques. More
                 importantly, ApproxiDB is also beneficial to precise
                 query processing, by developing non-trivial hybrid
                 execution mechanisms on both precise and approximate
                 hardware. In this vision paper, we sketch the initial
                 design of ApproxiDB, discuss the technical challenges
                 in building this system and outline an agenda for
                 future research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2014:DFK,
  author =       "Xin Luna Dong and Evgeniy Gabrilovich and Geremy Heitz
                 and Wilko Horn and Kevin Murphy and Shaohua Sun and Wei
                 Zhang",
  title =        "From data fusion to knowledge fusion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "881--892",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The task of data fusion is to identify the true values
                 of data items (e.g., the true date of birth for Tom
                 Cruise) among multiple observed values drawn from
                 different sources (e.g., Web sites) of varying (and
                 unknown) reliability. A recent survey [20] has provided
                 a detailed comparison of various fusion methods on Deep
                 Web data. In this paper, we study the applicability and
                 limitations of different fusion techniques on a more
                 challenging problem: knowledge fusion. Knowledge fusion
                 identifies true subject-predicate-object triples
                 extracted by multiple information extractors from
                 multiple information sources. These extractors perform
                 the tasks of entity linkage and schema alignment, thus
                 introducing an additional source of noise that is quite
                 different from that traditionally considered in the
                 data fusion literature, which only focuses on factual
                 errors in the original sources. We adapt
                 state-of-the-art data fusion techniques and apply them
                 to a knowledge base with 1.6B unique knowledge triples
                 extracted by 12 extractors from over 1B Web pages,
                 which is three orders of magnitude larger than the data
                 sets used in previous data fusion papers. We show great
                 promise of the data fusion approaches in solving the
                 knowledge fusion problem, and suggest interesting
                 research directions through a detailed error analysis
                 of the methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Funke:2014:KPC,
  author =       "Stefan Funke and Andr{\'e} Nusser and Sabine
                 Storandt",
  title =        "On $k$-path covers and their applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "893--902",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "For a directed graph G with vertex set V we call a
                 subset C \subseteq V a k-(All-)Path Cover if C contains
                 a node from any path consisting of $k$ nodes. This
                 paper considers the problem of constructing small
                 $k$-Path Covers in the context of road networks with
                 millions of nodes and edges. In many application
                 scenarios the set C and its induced overlay graph
                 constitute a very compact synopsis of G which is the
                 basis for the currently fastest data structure for
                 personalized shortest path queries, visually pleasing
                 overlays of subsampled paths, and efficient reporting,
                 retrieval and aggregation of associated data in spatial
                 network databases. Apart from a theoretical
                 investigation of the problem, we provide efficient
                 algorithms that produce very small $k$-Path Covers for
                 large real-world road networks (with a posteriori
                 guarantees via instance-based lower bounds).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2014:CDV,
  author =       "Eugene Wu and Leilani Battle and Samuel R. Madden",
  title =        "The case for data visualization management systems:
                 vision paper",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "903--906",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Most visualizations today are produced by retrieving
                 data from a database and using a specialized
                 visualization tool to render it. This decoupled
                 approach results in significant duplication of
                 functionality, such as aggregation and filters, and
                 misses tremendous opportunities for cross-layer
                 optimizations. In this paper, we present the case for
                 an integrated Data Visualization Management System
                 (DVMS) based on a declarative visualization language
                 that fully compiles the end-to-end visualization
                 pipeline into a set of relational algebra queries. Thus
                 the DVMS can be both expressive via the visualization
                 language, and performant by lever-aging traditional and
                 visualization-specific optimizations to scale
                 interactive visualizations to massive datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:WAA,
  author =       "Yinan Li and Jignesh M. Patel",
  title =        "{WideTable}: an accelerator for analytical data
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "907--918",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents a technique called WideTable that
                 aims to improve the speed of analytical data processing
                 systems. A WideTable is built by denormalizing the
                 database, and then converting complex queries into
                 simple scans on the underlying (wide) table. To avoid
                 the pitfalls associated with denormalization, e.g.
                 space overheads, WideTable uses a combination of
                 techniques including dictionary encoding and columnar
                 storage. When denormalizing the data, WideTable uses
                 outer joins to ensure that queries on tables in the
                 schema graph, which are now nested as embedded tables
                 in the WideTable, are processed correctly. Then, using
                 a packed code scan technique, even complex queries on
                 the original database can be answered by using simple
                 scans on the WideTable(s). We experimentally evaluate
                 our methods in a main memory setting using the queries
                 in TPC-H, and demonstrate the effectiveness of our
                 methods, both in terms of raw query performance and
                 scalability when running on many-core machines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{To:2014:FPW,
  author =       "Hien To and Gabriel Ghinita and Cyrus Shahabi",
  title =        "A framework for protecting worker location privacy in
                 spatial crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "10",
  pages =        "919--930",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:21 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Spatial Crowdsourcing (SC) is a transformative
                 platform that engages individuals, groups and
                 communities in the act of collecting, analyzing, and
                 disseminating environmental, social and other
                 spatio-temporal information. The objective of SC is to
                 outsource a set of spatio-temporal tasks to a set of
                 workers, i.e., individuals with mobile devices that
                 perform the tasks by physically traveling to specified
                 locations of interest. However, current solutions
                 require the workers, who in many cases are simply
                 volunteering for a cause, to disclose their locations
                 to untrustworthy entities. In this paper, we introduce
                 a framework for protecting location privacy of workers
                 participating in SC tasks. We argue that existing
                 location privacy techniques are not sufficient for SC,
                 and we propose a mechanism based on differential
                 privacy and geocasting that achieves effective SC
                 services while offering privacy guarantees to workers.
                 We investigate analytical models and task assignment
                 strategies that balance multiple crucial aspects of SC
                 functionality, such as task completion rate, worker
                 travel distance and system overhead. Extensive
                 experimental results on real-world datasets show that
                 the proposed technique protects workers' location
                 privacy without incurring significant performance
                 metrics penalties.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eldawy:2014:TTS,
  author =       "Ahmed Eldawy and Justin Levandoski and Per-{\AA}ke
                 Larson",
  title =        "Trekking through {Siberia}: managing cold data in a
                 memory-optimized database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "931--942",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Main memories are becoming sufficiently large that
                 most OLTP databases can be stored entirely in main
                 memory, but this may not be the best solution. OLTP
                 workloads typically exhibit skewed access patterns
                 where some records are hot (frequently accessed) but
                 many records are cold (infrequently or never accessed).
                 It is still more economical to store the coldest
                 records on secondary storage such as flash. This paper
                 introduces Siberia, a framework for managing cold data
                 in the Microsoft Hekaton main-memory database engine.
                 We discuss how to migrate cold data to secondary
                 storage while providing an interface to the user to
                 manipulate both hot and cold data that hides the actual
                 data location. We describe how queries of different
                 isolation levels can read and modify data stored in
                 both hot and cold stores without restriction while
                 minimizing number of accesses to cold storage. We also
                 show how records can be migrated between hot and cold
                 stores while the DBMS is online and active. Experiments
                 reveal that for cold data access rates appropriate for
                 main-memory optimized databases, we incur an acceptable
                 7-14\% throughput loss.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Duggan:2014:CPD,
  author =       "Jennie Duggan",
  title =        "The case for personal data-driven decision making",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "943--946",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data-driven decision making (D3M) has shown great
                 promise in professional pursuits such as business and
                 government. Here, policymakers collect and analyze data
                 to make their operations more efficient and equitable.
                 Progress in bringing the benefits of D3M to everyday
                 life has been slow. For example, a student asks, ``If I
                 pursue an undergraduate degree at this university, what
                 are my expected lifetime earnings?''. Presently there
                 is no principled way to search for this, because an
                 accurate answer depends on the student and school. Such
                 queries are personalized, winnowing down large datasets
                 for specific circumstances, rather than applying
                 well-defined predicates. They predict decision outcomes
                 by extrapolating from relevant examples. This vision
                 paper introduces a new approach to D3M that is designed
                 to empower the individual to make informed choices.
                 Here, we highlight research opportunities for the data
                 management community arising from this proposal.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chairunnanda:2014:CMM,
  author =       "Prima Chairunnanda and Khuzaima Daudjee and M. Tamer
                 {\"O}zsu",
  title =        "{ConfluxDB}: multi-master replication for partitioned
                 snapshot isolation databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "947--958",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Lazy replication with snapshot isolation (SI) has
                 emerged as a popular choice for distributed databases.
                 However, lazy replication often requires execution of
                 update transactions at one (master) site so that it is
                 relatively easy for a total SI order to be determined
                 for consistent installation of updates in the lazily
                 replicated system. We propose a set of techniques that
                 support update transaction execution over multiple
                 partitioned sites, thereby allowing the master to
                 scale. Our techniques determine a total SI order for
                 update transactions over multiple master sites without
                 requiring global coordination in the distributed
                 system, and ensure that updates are installed in this
                 order at all sites to provide consistent and scalable
                 replication with SI. We present ConfluxDB, a
                 PostgreSQL-based implementation of our techniques, and
                 demonstrate its effectiveness through experimental
                 evaluation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Goncalves:2014:DMS,
  author =       "Bernardo Gon{\c{c}}alves and Fabio Porto",
  title =        "{$ \gamma $-DB}: managing scientific hypotheses as
                 uncertain data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "959--962",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In view of the paradigm shift that makes science ever
                 more data-driven, we consider deterministic scientific
                 hypotheses as uncertain data. This vision comprises a
                 probabilistic database (p-DB) design methodology for
                 the systematic construction and management of
                 U-relational hypothesis DBs, viz., $ \gamma $-DBs. It
                 introduces hypothesis management as a promising new
                 class of applications for p-DBs. We illustrate the
                 potential of $ \gamma $-DB as a tool for deep
                 predictive analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Woods:2014:IIS,
  author =       "Louis Woods and Zsolt Istv{\'a}n and Gustavo Alonso",
  title =        "{Ibex}: an intelligent storage engine with support for
                 advanced {SQL} offloading",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "963--974",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern data appliances face severe bandwidth
                 bottlenecks when moving vast amounts of data from
                 storage to the query processing nodes. A possible
                 solution to mitigate these bottlenecks is query
                 off-loading to an intelligent storage engine, where
                 partial or whole queries are pushed down to the storage
                 engine. In this paper, we present Ibex, a prototype of
                 an intelligent storage engine that supports off-loading
                 of complex query operators. Besides increasing
                 performance, Ibex also reduces energy consumption, as
                 it uses an FPGA rather than conventional CPUs to
                 implement the off-load engine. Ibex is a hybrid engine,
                 with dedicated hardware that evaluates SQL expressions
                 at line-rate and a software fallback for tasks that the
                 hardware engine cannot handle. Ibex supports GROUP BY
                 aggregation, as well as projection --- and selection
                 --- based filtering. GROUP BY aggregation has a higher
                 impact on performance but is also a more challenging
                 operator to implement on an FPGA.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yun:2014:NNL,
  author =       "Hyokun Yun and Hsiang-Fu Yu and Cho-Jui Hsieh and S.
                 V. N. Vishwanathan and Inderjit Dhillon",
  title =        "{NOMAD}: non-locking, stochastic multi-machine
                 algorithm for asynchronous and decentralized matrix
                 completion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "975--986",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We develop an efficient parallel distributed algorithm
                 for matrix completion, named NOMAD (Non-locking,
                 stOchastic Multi-machine algorithm for Asynchronous and
                 Decentralized matrix completion). NOMAD is a
                 decentralized algorithm with non-blocking communication
                 between processors. One of the key features of NOMAD is
                 that the ownership of a variable is asynchronously
                 transferred between processors in a decentralized
                 fashion. As a consequence it is a lock-free parallel
                 algorithm. In spite of being asynchronous, the variable
                 updates of NOMAD are serializable, that is, there is an
                 equivalent update ordering in a serial implementation.
                 NOMAD outperforms synchronous algorithms which require
                 explicit bulk synchronization after every iteration:
                 our extensive empirical evaluation shows that not only
                 does our algorithm perform well in distributed setting
                 on commodity hardware, but also outperforms
                 state-of-the-art algorithms on a HPC cluster both in
                 multi-core and distributed memory settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Song:2014:RVL,
  author =       "Shaoxu Song and Hong Cheng and Jeffrey Xu Yu and Lei
                 Chen",
  title =        "Repairing vertex labels under neighborhood
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "987--998",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A broad class of data, ranging from similarity
                 networks, workflow networks to protein networks, can be
                 modeled as graphs with data values as vertex labels.
                 The vertex labels (data values) are often dirty for
                 various reasons such as typos or erroneous reporting of
                 results in scientific experiments. Neighborhood
                 constraints, specifying label pairs that are allowed to
                 appear on adjacent vertexes in the graph, are employed
                 to detect and repair erroneous vertex labels. In this
                 paper, we study the problem of repairing vertex labels
                 to make graphs satisfy neighborhood constraints.
                 Unfortunately, the relabeling problem is proved to be
                 NP hard, which motivates us to devise approximation
                 methods for repairing, and identify interesting special
                 cases (star and clique constraints) that can be
                 efficiently solved. We propose several approximate
                 repairing algorithms including greedy heuristics,
                 contraction method and a hybrid approach. The
                 performances of algorithms are also analyzed for the
                 special case. Our extensive experimental evaluation, on
                 both synthetic and real data, demonstrates the
                 effectiveness of eliminating frauds in several types of
                 application networks. Remarkably, the hybrid method
                 performs well in practice, i.e., guarantees
                 termination, while achieving high effectiveness at the
                 same time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Altowim:2014:PAR,
  author =       "Yasser Altowim and Dmitri V. Kalashnikov and Sharad
                 Mehrotra",
  title =        "Progressive approach to relational entity resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "999--1010",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper proposes a progressive approach to entity
                 resolution (ER) that allows users to explore a
                 trade-off between the resolution cost and the achieved
                 quality of the resolved data. In particular, our
                 approach aims to produce the highest quality result
                 given a constraint on the resolution budget, specified
                 by the user. Our proposed method monitors and
                 dynamically reassesses the resolution progress to
                 determine which parts of the data should be resolved
                 next and how they should be resolved. The comprehensive
                 empirical evaluation of the proposed approach
                 demonstrates its significant advantage in terms of
                 efficiency over the traditional ER techniques for the
                 given problem settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2014:CAQ,
  author =       "Kaibo Wang and Kai Zhang and Yuan Yuan and Siyuan Ma
                 and Rubao Lee and Xiaoning Ding and Xiaodong Zhang",
  title =        "Concurrent analytical query processing with {GPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "11",
  pages =        "1011--1022",
  month =        jul,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In current databases, GPUs are used as dedicated
                 accelerators to process each individual query. Sharing
                 GPUs among concurrent queries is not supported, causing
                 serious resource underutilization. Based on the
                 profiling of an open-source GPU query engine running
                 commonly used single-query data warehousing workloads,
                 we observe that the utilization of main GPU resources
                 is only up to 25\%. The underutilization leads to low
                 system throughput. To address the problem, this paper
                 proposes concurrent query execution as an effective
                 solution. To efficiently share GPUs among concurrent
                 queries for high throughput, the major challenge is to
                 provide software support to control and resolve
                 resource contention incurred by the sharing. Our
                 solution relies on GPU query scheduling and device
                 memory swapping policies to address this challenge. We
                 have implemented a prototype system and evaluated it
                 intensively. The experiment results confirm the
                 effectiveness and performance advantage of our
                 approach. By executing multiple GPU queries
                 concurrently, system throughput can be improved by up
                 to 55\% compared with dedicated processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Maehara:2014:CPP,
  author =       "Takanori Maehara and Takuya Akiba and Yoichi Iwata and
                 Ken-ichi Kawarabayashi",
  title =        "Computing personalized {PageRank} quickly by
                 exploiting graph structures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1023--1034",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose a new scalable algorithm that can compute
                 Personalized PageRank (PPR) very quickly. The Power
                 method is a state-of-the-art algorithm for computing
                 exact PPR; however, it requires many iterations. Thus
                 reducing the number of iterations is the main
                 challenge. We achieve this by exploiting graph
                 structures of web graphs and social networks. The
                 convergence of our algorithm is very fast. In fact, it
                 requires up to 7.5 times fewer iterations than the
                 Power method and is up to five times faster in actual
                 computation time. To the best of our knowledge, this is
                 the first time to use graph structures explicitly to
                 solve PPR quickly. Our contributions can be summarized
                 as follows. 1. We provide an algorithm for computing a
                 tree decomposition, which is more efficient and
                 scalable than any previous algorithm. 2. Using the
                 above algorithm, we can obtain a core-tree
                 decomposition of any web graph and social network. This
                 allows us to decompose a web graph and a social network
                 into (1) the core, which behaves like an expander
                 graph, and (2) a small tree-width graph, which behaves
                 like a tree in an algorithmic sense. 3. We apply a
                 direct method to the small tree-width graph to
                 construct an LU decomposition. 4. Building on the LU
                 decomposition and using it as pre-conditioner, we apply
                 GMRES method (a state-of-the-art advanced iterative
                 method) to compute PPR for whole web graphs and social
                 networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Serafini:2014:AES,
  author =       "Marco Serafini and Essam Mansour and Ashraf Aboulnaga
                 and Kenneth Salem and Taha Rafiq and Umar Farooq
                 Minhas",
  title =        "{Accordion}: elastic scalability for database systems
                 supporting distributed transactions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1035--1046",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Providing the ability to elastically use more or fewer
                 servers on demand (scale out and scale in) as the load
                 varies is essential for database management systems
                 (DBMSes) deployed on today's distributed computing
                 platforms, such as the cloud. This requires solving the
                 problem of dynamic (online) data placement, which has
                 so far been addressed only for workloads where all
                 transactions are local to one sever. In DBMSes where
                 ACID transactions can access more than one partition,
                 distributed transactions represent a major performance
                 bottleneck. Scaling out and spreading data across a
                 larger number of servers does not necessarily result in
                 a linear increase in the overall system throughput,
                 because transactions that used to access only one
                 server may become distributed. In this paper we present
                 Accordion, a dynamic data placement system for
                 partition-based DBMSes that support ACID transactions
                 (local or distributed). It does so by explicitly
                 considering the affinity between partitions, which
                 indicates the frequency in which they are accessed
                 together by the same transactions. Accordion estimates
                 the capacity of a server by explicitly considering the
                 impact of distributed transactions and affinity on the
                 maximum throughput of the server. It then integrates
                 this estimation in a mixed-integer linear program to
                 explore the space of possible configurations and decide
                 whether to scale out. We implemented Accordion and
                 evaluated it using H-Store, a shared-nothing in-memory
                 DBMS. Our results using the TPC-C and YCSB benchmarks
                 show that Accordion achieves benefits compared to
                 alternative heuristics of up to an order of magnitude
                 reduction in the number of servers used and in the
                 amount of data migrated.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Han:2014:ECP,
  author =       "Minyang Han and Khuzaima Daudjee and Khaled Ammar and
                 M. Tamer {\"O}zsu and Xingfang Wang and Tianqi Jin",
  title =        "An experimental comparison of Pregel-like graph
                 processing systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1047--1058",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The introduction of Google's Pregel generated much
                 interest in the field of large-scale graph data
                 processing, inspiring the development of Pregel-like
                 systems such as Apache Giraph, GPS, Mizan, and
                 GraphLab, all of which have appeared in the past two
                 years. To gain an understanding of how Pregel-like
                 systems perform, we conduct a study to experimentally
                 compare Giraph, GPS, Mizan, and GraphLab on equal
                 ground by considering graph and algorithm agnostic
                 optimizations and by using several metrics. The systems
                 are compared with four different algorithms (PageRank,
                 single source shortest path, weakly connected
                 components, and distributed minimum spanning tree) on
                 up to 128 Amazon EC2 machines. We find that the system
                 optimizations present in Giraph and GraphLab allow them
                 to perform well. Our evaluation also shows Giraph
                 1.0.0's considerable improvement since Giraph 0.1 and
                 identifies areas of improvement for all systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sarma:2014:CSJ,
  author =       "Akash {Das Sarma} and Yeye He and Surajit Chaudhuri",
  title =        "{ClusterJoin}: a similarity joins framework using
                 map-reduce",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1059--1070",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Similarity join is the problem of finding pairs of
                 records with similarity score greater than some
                 threshold. In this paper we study the problem of
                 scaling up similarity join for different metric
                 distance functions using MapReduce. We propose a
                 ClusterJoin framework that partitions the data space
                 based on the underlying data distribution, and
                 distributes each record to partitions in which they may
                 produce join results based on the distance threshold.
                 We design a set of strong candidate filters specific to
                 different distance functions using a novel
                 bisector-based framework, so that each record only
                 needs to be distributed to a small number of partitions
                 while still guaranteeing correctness. To address data
                 skewness, which is common for high dimensional data, we
                 further develop a dynamic load balancing scheme using
                 sampling, which provides strong probabilistic
                 guarantees on the size of partitions, and greatly
                 improves scalability. Experimental evaluation using
                 real data sets shows that our approach is considerably
                 more scalable compared to state-of-the-art algorithms,
                 especially for high dimensional data with low distance
                 thresholds.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vesdapunt:2014:CAE,
  author =       "Norases Vesdapunt and Kedar Bellare and Nilesh Dalvi",
  title =        "Crowdsourcing algorithms for entity resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1071--1082",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we study a hybrid human-machine
                 approach for solving the problem of Entity Resolution
                 (ER). The goal of ER is to identify all records in a
                 database that refer to the same underlying entity, and
                 are therefore duplicates of each other. Our input is a
                 graph over all the records in a database, where each
                 edge has a probability denoting our prior belief (based
                 on Machine Learning models) that the pair of records
                 represented by the given edge are duplicates. Our
                 objective is to resolve all the duplicates by asking
                 humans to verify the equality of a subset of edges,
                 leveraging the transitivity of the equality relation to
                 infer the remaining edges (e.g. $ a = c $ can be
                 inferred given $ a = b $ and $ b = c$). We consider the
                 problem of designing optimal strategies for asking
                 questions to humans that minimize the expected number
                 of questions asked. Using our theoretical framework, we
                 analyze several strategies, and show that a strategy,
                 claimed as ``optimal'' for this problem in a recent
                 work, can perform arbitrarily bad in theory. We propose
                 alternate strategies with theoretical guarantees. Using
                 both public datasets as well as the production system
                 at Facebook, we show that our techniques are effective
                 in practice.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2014:DGS,
  author =       "Wenfei Fan and Xin Wang and Yinghui Wu and Dong Deng",
  title =        "Distributed graph simulation: impossibility and
                 possibility",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1083--1094",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper studies fundamental problems for
                 distributed graph simulation. Given a pattern query Q
                 and a graph G that is fragmented and distributed, a
                 graph simulation algorithm A is to compute the matches
                 Q (G) of Q in G. We say that A is parallel scalable in
                 (a) response time if its parallel computational cost is
                 determined by the largest fragment F$_m$ of G and the
                 size | Q | of query Q, and (b) data shipment if its
                 total amount of data shipped is determined by | Q | and
                 the number of fragments of G, independent of the size
                 of graph G. (1) We prove an impossibility theorem:
                 there exists no distributed graph simulation algorithm
                 that is parallel scalable in either response time or
                 data shipment. (2) However, we show that distributed
                 graph simulation is partition bounded, i.e., its
                 response time depends only on | Q |, | F$_m$ | and the
                 number | V$_f$ | of nodes in G with edges across
                 different fragments; and its data shipment depends on |
                 Q | and the number | E$_f$ | of crossing edges only. We
                 provide the first algorithms with these performance
                 guarantees. (3) We also identify special cases of
                 patterns and graphs when parallel scalability is
                 possible. (4) We experimentally verify the scalability
                 and efficiency of our algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nagel:2014:CGE,
  author =       "Fabian Nagel and Gavin Bierman and Stratis D. Viglas",
  title =        "Code generation for efficient query processing in
                 managed runtimes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1095--1106",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper we examine opportunities arising from
                 the convergence of two trends in data management:
                 in-memory database systems (imdbs), which have received
                 renewed attention following the availability of
                 affordable, very large main memory systems; and
                 language-integrated query, which transparently
                 integrates database queries with programming languages
                 (thus addressing the famous 'impedance mismatch'
                 problem). Language-integrated query not only gives
                 application developers a more convenient way to query
                 external data sources like imdbs, but also to use the
                 same querying language to query an application's
                 in-memory collections. The latter offers further
                 transparency to developers as the query language and
                 all data is represented in the data model of the host
                 programming language. However, compared to imdbs, this
                 additional freedom comes at a higher cost for query
                 evaluation. Our vision is to improve in-memory query
                 processing of application objects by introducing
                 database technologies to managed runtimes. We focus on
                 querying and we leverage query compilation to improve
                 query processing on application objects. We explore
                 different query compilation strategies and study how
                 they improve the performance of query processing over
                 application data. We take C\# as the host programming
                 language as it supports language-integrated query
                 through the linq framework. Our techniques deliver
                 significant performance improvements over the default
                 linq implementation. Our work makes important first
                 steps towards a future where data processing
                 applications will commonly run on machines that can
                 store their entire datasets in-memory, and will be
                 written in a single programming language employing
                 language-integrated query and imdb-inspired runtimes to
                 provide transparent and highly efficient querying.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2014:AED,
  author =       "Weimo Liu and Saravanan Thirumuruganathan and Nan
                 Zhang and Gautam Das",
  title =        "Aggregate estimation over dynamic hidden web
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1107--1118",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many databases on the web are ``hidden'' behind (i.e.,
                 accessible only through) their restrictive, form-like,
                 search interfaces. Recent studies have shown that it is
                 possible to estimate aggregate query answers over such
                 hidden web databases by issuing a small number of
                 carefully designed search queries through the
                 restrictive web interface. A problem with these
                 existing work, however, is that they all assume the
                 underlying database to be static, while most real-world
                 web databases (e.g., Amazon, eBay) are frequently
                 updated. In this paper, we study the novel problem of
                 estimating/tracking aggregates over dynamic hidden web
                 databases while adhering to the stringent query-cost
                 limitation they enforce (e.g., at most 1,000 search
                 queries per day). Theoretical analysis and extensive
                 real-world experiments demonstrate the effectiveness of
                 our proposed algorithms and their superiority over
                 baseline solutions (e.g., the repeated execution of
                 algorithms designed for static web databases).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karpathiotakis:2014:AQP,
  author =       "Manos Karpathiotakis and Miguel Branco and Ioannis
                 Alagiannis and Anastasia Ailamaki",
  title =        "Adaptive query processing on {RAW} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1119--1130",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database systems deliver impressive performance for
                 large classes of workloads as the result of decades of
                 research into optimizing database engines. High
                 performance, however, is achieved at the cost of
                 versatility. In particular, database systems only
                 operate efficiently over loaded data, i.e., data
                 converted from its original raw format into the
                 system's internal data format. At the same time, data
                 volume continues to increase exponentially and data
                 varies increasingly, with an escalating number of new
                 formats. The consequence is a growing impedance
                 mismatch between the original structures holding the
                 data in the raw files and the structures used by query
                 engines for efficient processing. In an ideal scenario,
                 the query engine would seamlessly adapt itself to the
                 data and ensure efficient query processing regardless
                 of the input data formats, optimizing itself to each
                 instance of a file and of a query by leveraging
                 information available at query time. Today's systems,
                 however, force data to adapt to the query engine during
                 data loading. This paper proposes adapting the query
                 engine to the formats of raw data. It presents RAW, a
                 prototype query engine which enables querying
                 heterogeneous data sources transparently. RAW employs
                 Just-In-Time access paths, which efficiently couple
                 heterogeneous raw files to the query engine and reduce
                 the overheads of traditional general-purpose scan
                 operators. There are, however, inherent overheads with
                 accessing raw data directly that cannot be eliminated,
                 such as converting the raw values. Therefore, RAW also
                 uses column shreds, ensuring that we pay these costs
                 only for the subsets of raw data strictly needed by a
                 query. We use RAW in a real-world scenario and achieve
                 a two-order of magnitude speedup against the existing
                 hand-written solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Afrati:2014:SQT,
  author =       "Foto N. Afrati and Dan Delorey and Mosha Pasumansky
                 and Jeffrey D. Ullman",
  title =        "Storing and querying tree-structured records in
                 {Dremel}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1131--1142",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In Dremel, data is stored as nested relations. The
                 schema for a relation is a tree, all of whose nodes are
                 attributes, and whose leaf attributes hold values. We
                 explore filter and aggregate queries that are given in
                 the Dremel dialect of SQL. Complications arise because
                 of repeated attributes, i.e., attributes that are
                 allowed to have more than one value. We focus on the
                 common class of Dremel queries that are processed on
                 column-stored data in a way that results in query
                 processing time that is linear on the size of the
                 relevant data, i.e., data in the columns that
                 participate in the query. We formally define the data
                 model, the query language and the algorithms for query
                 processing in column-stored data. The concepts of
                 repetition context and semi-flattening are introduced
                 here and play a central role in understanding this
                 class of queries and their algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Starlinger:2014:SSS,
  author =       "Johannes Starlinger and Bryan Brancotte and Sarah
                 Cohen-Boulakia and Ulf Leser",
  title =        "Similarity search for scientific workflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1143--1154",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increasing popularity of scientific
                 workflows, public repositories are gaining importance
                 as a means to share, find, and reuse such workflows. As
                 the sizes of these repositories grow, methods to
                 compare the scientific workflows stored in them become
                 a necessity, for instance, to allow duplicate detection
                 or similarity search. Scientific workflows are complex
                 objects, and their comparison entails a number of
                 distinct steps from comparing atomic elements to
                 comparison of the workflows as a whole. Various studies
                 have implemented methods for scientific workflow
                 comparison and came up with often contradicting
                 conclusions upon which algorithms work best. Comparing
                 these results is cumbersome, as the original studies
                 mixed different approaches for different steps and used
                 different evaluation data and metrics. We contribute to
                 the field (i) by dissecting each previous approach into
                 an explicitly defined and comparable set of subtasks,
                 (ii) by comparing in isolation different approaches
                 taken at each step of scientific workflow comparison,
                 reporting on an number of unexpected findings, (iii) by
                 investigating how these can best be combined into
                 aggregated measures, and (iv) by making available a
                 gold standard of over 2000 similarity ratings
                 contributed by 15 workflow experts on a corpus of
                 almost 1500 workflows and re-implementations of all
                 methods we evaluated.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kellaris:2014:DPE,
  author =       "Georgios Kellaris and Stavros Papadopoulos and Xiaokui
                 Xiao and Dimitris Papadias",
  title =        "Differentially private event sequences over infinite
                 streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1155--1166",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Numerous applications require continuous publication
                 of statistics or monitoring purposes, such as real-time
                 traffic analysis, timely disease outbreak discovery,
                 and social trends observation. These statistics may be
                 derived from sensitive user data and, hence,
                 necessitate privacy preservation. A notable paradigm
                 for offering strong privacy guarantees in statistics
                 publishing is \epsilon -differential privacy. However,
                 there is limited literature that adapts this concept to
                 settings where the statistics are computed over an
                 infinite stream of ``events'' (i.e., data items
                 generated by the users), and published periodically.
                 These works aim at hiding a single event over the
                 entire stream. We argue that, in most practical
                 scenarios, sensitive information is revealed from
                 multiple events occurring at contiguous time instances.
                 Towards this end, we put forth the novel notion of $w$
                 --- event privacy over infinite streams, which protects
                 any event sequence occurring in $w$ successive time
                 instants. We first formulate our privacy concept,
                 motivate its importance, and introduce a methodology
                 for achieving it. We next design two instantiations,
                 whose utility is independent of the stream length.
                 Finally, we confirm the practicality of our solutions
                 experimenting with real data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Londhe:2014:MTC,
  author =       "Nikhil Londhe and Vishrawas Gopalakrishnan and Aidong
                 Zhang and Hung Q. Ngo and Rohini Srihari",
  title =        "Matching titles with cross title web-search enrichment
                 and community detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1167--1178",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Title matching refers roughly to the following
                 problem. We are given two strings of text obtained from
                 different data sources. The texts refer to some
                 underlying physical entities and the problem is to
                 report whether the two strings refer to the same
                 physical entity or not. There are manifestations of
                 this problem in a variety of domains, such as product
                 or bibliography matching, and location or person
                 disambiguation. We propose a new approach to solving
                 this problem, consisting of two main components. The
                 first component uses Web searches to ``enrich'' the
                 given pair of titles: making titles that refer to the
                 same physical entity more similar, and those which do
                 not, much less similar. A notion of similarity is then
                 measured using the second component, where the tokens
                 from the two titles are modelled as vertices of a
                 ``social'' network graph. A ``strength of ties'' style
                 of clustering algorithm is then applied on this to see
                 whether they form one cohesive ``community'' (matching
                 titles), or separately clustered communities
                 (mismatching titles). Experimental results confirm the
                 effectiveness of our approach over existing title
                 matching methods across several input domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Song:2014:CSR,
  author =       "Shaoxu Song and Lei Chen and Hong Cheng",
  title =        "On concise set of relative candidate keys",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1179--1190",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Matching keys, specifying what attributes to compare
                 and how to compare them for identifying the same
                 real-world entities, are found to be useful in
                 applications like record matching, blocking and
                 windowing [7]. Owing to the complex redundant semantics
                 among matching keys, capturing a proper set of matching
                 keys is highly non-trivial. Analogous to
                 minimal/candidate keys w.r.t. functional dependencies,
                 relative candidate keys (RCKs [7], with a minimal
                 number of compared attributes, see a more formal
                 definition in Section 2) can clear up redundant
                 semantics w.r.t. ``what attributes to compare''.
                 However, we note that redundancy issues may still exist
                 among rcks on the same attributes about ``how to
                 compare them''. In this paper, we propose to find a
                 concise set of matching keys, which has less redundancy
                 and can still meet the requirements on coverage and
                 validity. Specifically, we study approximation
                 algorithms to efficiently discover a near optimal set.
                 To ensure the quality of matching keys, the returned
                 results are guaranteed to be RCKs (minimal on compared
                 attributes), and most importantly, minimal w.r.t.
                 distance restrictions (i.e., redundancy free w.r.t.
                 ``how to compare the attributes''). The experimental
                 evaluation demonstrates that our concise RCK set is
                 more effective than the existing rck choosing method.
                 Moreover, the proposed pruning methods show up to 2
                 orders of magnitude improvement w.r.t. time costs on
                 concise RCK set discovery.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wei:2014:RQI,
  author =       "Hao Wei and Jeffrey Xu Yu and Can Lu and Ruoming Jin",
  title =        "Reachability querying: an independent permutation
                 labeling approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1191--1202",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Reachability query is a fundamental graph operation
                 which answers whether a vertex can reach another vertex
                 over a large directed graph G with $n$ vertices and $m$
                 edges, and has been extensively studied. In the
                 literature, all the approaches compute a label for
                 every vertex in a graph G by index construction
                 offline. The query time for answering reachability
                 queries online is affected by the quality of the labels
                 computed in index construction. The three main costs
                 are the index construction time, the index size, and
                 the query time. Some of the up-to-date approaches can
                 answer reachability queries efficiently, but spend
                 non-linear time to construct an index. Some of the
                 up-to-date approaches construct an index in linear time
                 and space, but may need to depth-first search G at
                 run-time in $ O(n + m)$. In this paper, as the first,
                 we propose a new randomized labeling approach to answer
                 reachability queries, and the randomness is by
                 independent permutation. We conduct extensive
                 experimental studies to compare with the up-to-date
                 approaches using 19 large real datasets used in the
                 existing work and synthetic datasets. We confirm the
                 efficiency of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2014:HDL,
  author =       "Minhao Jiang and Ada Wai-Chee Fu and Raymond Chi-Wing
                 Wong and Yanyan Xu",
  title =        "Hop doubling label indexing for point-to-point
                 distance querying on scale-free networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1203--1214",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of point-to-point distance
                 querying for massive scale-free graphs, which is
                 important for numerous applications. Given a directed
                 or undirected graph, we propose to build an index for
                 answering such queries based on a novel hop-doubling
                 labeling technique. We derive bounds on the index size,
                 the computation costs and I/O costs based on the
                 properties of unweighted scale-free graphs. We show
                 that our method is much more efficient and effective
                 compared to the state-of-the-art techniques, in terms
                 of both querying time and indexing costs. Our empirical
                 study shows that our method can handle graphs that are
                 orders of magnitude larger than existing methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Suchanek:2014:SC,
  author =       "Fabian M. Suchanek and Nicoleta Preda",
  title =        "Semantic culturomics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1215--1218",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Newspapers are testimonials of history. The same is
                 increasingly true of social media such as online
                 forums, online communities, and blogs. By looking at
                 the sequence of articles over time, one can discover
                 the birth and the development of trends that marked
                 society and history --- a field known as
                 ``Culturomics''. But Culturomics has so far been
                 limited to statistics on keywords. In this vision
                 paper, we argue that the advent of large knowledge
                 bases (such as YAGO [37], NELL [5], DBpedia [3], and
                 Freebase) will revolutionize the field. If their
                 knowledge is combined with the news articles, it can
                 breathe life into what is otherwise just a sequence of
                 words for a machine. This will allow discovering trends
                 in history and culture, explaining them through
                 explicit logical rules, and making predictions about
                 the events of the future. We predict that this could
                 open up a new field of research, ``Semantic
                 Culturomics'', in which no longer human text helps
                 machines build up knowledge bases, but knowledge bases
                 help humans understand their society.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kuhlenkamp:2014:BSE,
  author =       "J{\"o}rn Kuhlenkamp and Markus Klems and Oliver
                 R{\"o}ss",
  title =        "Benchmarking scalability and elasticity of distributed
                 database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1219--1230",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed database system performance benchmarks are
                 an important source of information for decision makers
                 who must select the right technology for their data
                 management problems. Since important decisions rely on
                 trustworthy experimental data, it is necessary to
                 reproduce experiments and verify the results. We
                 reproduce performance and scalability benchmarking
                 experiments of HBase and Cassandra that have been
                 conducted by previous research and compare the results.
                 The scope of our reproduced experiments is extended
                 with a performance evaluation of Cassandra on different
                 Amazon EC2 infrastructure configurations, and an
                 evaluation of Cassandra and HBase elasticity by
                 measuring scaling speed and performance impact while
                 scaling.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2014:BCQ,
  author =       "Yang Cao and Wenfei Fan and Tianyu Wo and Wenyuan Yu",
  title =        "Bounded conjunctive queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1231--1242",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A query Q is said to be effectively bounded if for all
                 datasets D, there exists a subset D$_Q$ of D such that
                 Q (D) = Q (D$_Q$), and the size of DQ and time for
                 fetching D$_Q$ are independent of the size of D. The
                 need for studying such queries is evident, since it
                 allows us to compute Q (D) by accessing a bounded
                 dataset D$_Q$, regardless of how big D is. This paper
                 investigates effectively bounded conjunctive queries
                 (SPC) under an access schema A, which specifies indices
                 and cardinality constraints commonly used. We provide
                 characterizations (sufficient and necessary conditions)
                 for determining whether an SPC query Q is effectively
                 bounded under A. We study several problems for deciding
                 whether Q is bounded, and if not, for identifying a
                 minimum set of parameters of Q to instantiate and make
                 Q bounded. We show that these problems range from
                 quadratic-time to NP-complete, and develop efficient
                 (heuristic) algorithms for them. We also provide an
                 algorithm that, given an effectively bounded SPC query
                 Q and an access schema A, generates a query plan for
                 evaluating Q by accessing a bounded amount of data in
                 any (possibly big) dataset. We experimentally verify
                 that our algorithms substantially reduce the cost of
                 query evaluation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shanbhag:2014:OJE,
  author =       "Anil Shanbhag and S. Sudarshan",
  title =        "Optimizing join enumeration in transformation-based
                 query optimizers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1243--1254",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query optimizers built on the Volcano/Cascades
                 framework, which is based on transformation rules, are
                 used in many commercial databases. Transformation
                 rulesets proposed earlier for join order enumeration in
                 such a framework either allow enumeration of joins with
                 cross-products (which can significantly increase the
                 cost of optimization), or generate a large number of
                 duplicate derivations. In this paper we propose two new
                 rulesets for generating cross-product free trees. One
                 of the rulesets is a minor extension of a simple but
                 inefficient ruleset, which we prove is complete (we
                 also show that a naive extension of an efficient
                 ruleset leads to incompleteness). We then propose an
                 efficient new ruleset, which is based on techniques
                 proposed recently for top-down join order enumeration,
                 but unlike earlier work it is cleanly integrated into
                 the Volcano/Cascades framework, and can be used in
                 conjunction with other transformation rules. We show
                 that our ruleset is complete (i.e., it generates the
                 entire search space without cross products) while
                 avoiding inefficiency due to duplicate derivations. We
                 have implemented this ruleset in the PyroJ Optimizer
                 (an implementation of the Volcano optimizer framework)
                 and show that it significantly outperforms the
                 alternatives, in some cases by up to two orders of
                 magnitude, in terms of time taken.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jacob:2014:SMA,
  author =       "Marie Jacob and Benny Kimelfeld and Julia
                 Stoyanovich",
  title =        "A system for management and analysis of preference
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1255--1258",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Preference data arises in a wide variety of domains.
                 Over the past decade, we have seen a sharp increase in
                 the volume of preference data, in the diversity of
                 applications that use it, and in the richness of
                 preference data analysis methods. Examples of
                 applications include rank aggregation in genomic data
                 analysis, management of votes in elections, and
                 recommendation systems in e-commerce. However, little
                 attention has been paid to the challenges of building a
                 system for preference-data management, which would help
                 incorporate sophisticated analytics into larger
                 applications, support computational abstractions for
                 usability by data scientists, and enable scaling up to
                 modern volumes. This vision paper proposes a management
                 system for preference data that aims to address these
                 challenges. We adopt the relational database model, and
                 propose extensions that are specialized to handling
                 preference data. Specifically, we introduce a special
                 type of a relation that is designed for preference
                 data, and describe composable operators on preference
                 relations that can be embedded in SQL statements, for
                 convenient reuse across applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gupta:2014:MGR,
  author =       "Ashish Gupta and Fan Yang and Jason Govig and Adam
                 Kirsch and Kelvin Chan and Kevin Lai and Shuo Wu and
                 Sandeep Govind Dhoot and Abhilash Rajesh Kumar and
                 Ankur Agiwal and Sanjay Bhansali and Mingsheng Hong and
                 Jamie Cameron and Masood Siddiqi and David Jones and
                 Jeff Shute and Andrey Gubarev and Shivakumar
                 Venkataraman and Divyakant Agrawal",
  title =        "{Mesa}: geo-replicated, near real-time, scalable data
                 warehousing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1259--1270",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Mesa is a highly scalable analytic data warehousing
                 system that stores critical measurement data related to
                 Google's Internet advertising business. Mesa is
                 designed to satisfy a complex and challenging set of
                 user and systems requirements, including near real-time
                 data ingestion and queryability, as well as high
                 availability, reliability, fault tolerance, and
                 scalability for large data and query volumes.
                 Specifically, Mesa handles petabytes of data, processes
                 millions of row updates per second, and serves billions
                 of queries that fetch trillions of rows per day. Mesa
                 is geo-replicated across multiple datacenters and
                 provides consistent and repeatable query answers at low
                 latency, even when an entire datacenter fails. This
                 paper presents the Mesa system and reports the
                 performance and scale that it achieves.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liagouris:2014:EES,
  author =       "John Liagouris and Nikos Mamoulis and Panagiotis
                 Bouros and Manolis Terrovitis",
  title =        "An effective encoding scheme for spatial {RDF} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1271--1282",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The RDF data model has recently been extended to
                 support representation and querying of spatial
                 information (i.e., locations and geometries), which is
                 associated with RDF entities. Still, there are limited
                 efforts towards extending RDF stores to efficiently
                 support spatial queries, such as range selections
                 (e.g., find entities within a given range) and spatial
                 joins (e.g., find pairs of entities whose locations are
                 close to each other). In this paper, we propose an
                 extension for RDF stores that supports efficient
                 spatial data management. Our contributions include an
                 effective encoding scheme for entities having spatial
                 locations, the introduction of on-the-fly spatial
                 filters and spatial join algorithms, and several
                 optimizations that minimize the overhead of geometry
                 and dictionary accesses. We implemented the proposed
                 techniques as an extension to the opensource RDF-3X
                 engine and we experimentally evaluated them using real
                 RDF knowledge bases. The results show that our system
                 offers robust performance for spatial queries, while
                 introducing little overhead to the original query
                 engine.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:DSM,
  author =       "Ce Zhang and Christopher R{\'e}",
  title =        "{DimmWitted}: a study of main-memory statistical
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1283--1294",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We perform the first study of the tradeoff space of
                 access methods and replication to support statistical
                 analytics using first-order methods executed in the
                 main memory of a Non-Uniform Memory Access (NUMA)
                 machine. Statistical analytics systems differ from
                 conventional SQL-analytics in the amount and types of
                 memory incoherence that they can tolerate. Our goal is
                 to understand tradeoffs in accessing the data in row-
                 or column-order and at what granularity one should
                 share the model and data for a statistical task. We
                 study this new tradeoff space and discover that there
                 are tradeoffs between hardware and statistical
                 efficiency. We argue that our tradeoff study may
                 provide valuable information for designers of analytics
                 engines: for each system we consider, our prototype
                 engine can run at least one popular task at least 100$
                 \times $ faster. We conduct our study across five
                 architectures using popular models, including SVMs,
                 logistic regression, Gibbs sampling, and neural
                 networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Floratou:2014:SHF,
  author =       "Avrilia Floratou and Umar Farooq Minhas and Fatma
                 {\"O}zcan",
  title =        "{SQL-on-Hadoop}: full circle back to shared-nothing
                 database architectures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1295--1306",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "SQL query processing for analytics over Hadoop data
                 has recently gained significant traction. Among many
                 systems providing some SQL support over Hadoop, Hive is
                 the first native Hadoop system that uses an underlying
                 framework such as MapReduce or Tez to process SQL-like
                 statements. Impala, on the other hand, represents the
                 new emerging class of SQL-on-Hadoop systems that
                 exploit a shared-nothing parallel database architecture
                 over Hadoop. Both systems optimize their data ingestion
                 via columnar storage, and promote different file
                 formats: ORC and Parquet. In this paper, we compare the
                 performance of these two systems by conducting a set of
                 cluster experiments using a TPC-H like benchmark and
                 two TPC-DS inspired workloads. We also closely study
                 the I/O efficiency of their columnar formats using a
                 set of micro-benchmarks. Our results show that Impala
                 is 3.3 X to 4.4 X faster than Hive on MapReduce and 2.1
                 X to 2.8 X than Hive on Tez for the overall TPC-H
                 experiments. Impala is also 8.2 X to 10 X faster than
                 Hive on MapReduce and about 4.3 X faster than Hive on
                 Tez for the TPC-DS inspired experiments. Through
                 detailed analysis of experimental results, we identify
                 the reasons for this performance gap and examine the
                 strengths and limitations of each system.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guarnieri:2014:OSA,
  author =       "Marco Guarnieri and David Basin",
  title =        "Optimal security-aware query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "12",
  pages =        "1307--1318",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:26 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Security-Aware Query Processing is the problem of
                 computing answers to queries in the presence of access
                 control policies. We present general impossibility
                 results for the existence of optimal algorithms for
                 Security-Aware Query Processing and classify query
                 languages for which such algorithms exist. In
                 particular, we show that for the relational calculus
                 there are no optimal algorithms, whereas optimal
                 algorithms exist for some of its fragments, such as the
                 existential fragment. We also establish relationships
                 between two different models of Fine-Grained Access
                 Control, called Truman and Non-Truman models, which
                 have been previously presented in the literature as
                 distinct. For optimal Security-Aware Query Processing,
                 we show that the Non-Truman model is a special case of
                 the Truman model for boolean queries in the relational
                 calculus, moreover the two models coincide for more
                 powerful languages, such as the relational calculus
                 with aggregation operators. In contrast, these two
                 models are distinct for non-boolean queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shi:2014:MTE,
  author =       "Juwei Shi and Jia Zou and Jiaheng Lu and Zhao Cao and
                 Shiqiang Li and Chen Wang",
  title =        "{MRTuner}: a toolkit to enable holistic optimization
                 for {MapReduce} jobs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1319--1330",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MapReduce based data-intensive computing solutions are
                 increasingly deployed as production systems. Unlike
                 Internet companies who invent and adopt the technology
                 from the very beginning, traditional enterprises demand
                 easy-to-use software due to the limited capabilities of
                 administrators. Automatic job optimization software for
                 MapReduce is a promising technique to satisfy such
                 requirements. In this paper, we introduce a toolkit
                 from IBM, called MRTuner, to enable holistic
                 optimization for MapReduce jobs. In particular, we
                 propose a novel Producer-Transporter-Consumer (PTC)
                 model, which characterizes the tradeoffs in the
                 parallel execution among tasks. We also carefully
                 investigate the complicated relations among about
                 twenty parameters, which have significant impact on the
                 job performance. We design an efficient search
                 algorithm to find the optimal execution plan. Finally,
                 we conduct a thorough experimental evaluation on two
                 different types of clusters using the HiBench suite
                 which covers various Hadoop workloads from GB to TB
                 size levels. The results show that the search latency
                 of MRTuner is a few orders of magnitude faster than
                 that of the state-of-the-art cost-based optimizer, and
                 the effectiveness of the optimized execution plan is
                 also significantly improved.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sadoghi:2014:RDL,
  author =       "Mohammad Sadoghi and Mustafa Canim and Bishwaranjan
                 Bhattacharjee and Fabian Nagel and Kenneth A. Ross",
  title =        "Reducing database locking contention through
                 multi-version concurrency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1331--1342",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In multi-version databases, updates and deletions of
                 records by transactions require appending a new record
                 to tables rather than performing in-place updates. This
                 mechanism incurs non-negligible performance overhead in
                 the presence of multiple indexes on a table, where
                 changes need to be propagated to all indexes.
                 Additionally, an uncommitted record update will block
                 other active transactions from using the index to fetch
                 the most recently committed values for the updated
                 record. In general, in order to support snapshot
                 isolation and/or multi-version concurrency, either each
                 active transaction is forced to search a database
                 temporary area (e.g., roll-back segments) to fetch old
                 values of desired records, or each transaction is
                 forced to scan the entire table to find the older
                 versions of the record in a multi-version database (in
                 the absence of specialized temporal indexes). In this
                 work, we describe a novel kV-Indirection structure to
                 enable efficient (parallelizable) optimistic and
                 pessimistic multi-version concurrency control by
                 utilizing the old versions of records (at most two
                 versions of each record) to provide direct access to
                 the recent changes of records without the need of
                 temporal indexes. As a result, our technique results in
                 higher degree of concurrency by reducing the clashes
                 between readers and writers of data and avoiding
                 extended lock delays. We have a working prototype of
                 our concurrency model and kV-Indirection structure in a
                 commercial database and conducted an extensive
                 evaluation to demonstrate the benefits of our
                 multi-version concurrency control, and we obtained
                 orders of magnitude speed up over the single-version
                 concurrency control.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Su:2014:CEM,
  author =       "Xueyuan Su and Garret Swart and Brian Goetz and Brian
                 Oliver and Paul Sandoz",
  title =        "Changing engines in midstream: a {Java} stream
                 computational model for big data processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1343--1354",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the addition of lambda expressions and the Stream
                 API in Java 8, Java has gained a powerful and
                 expressive query language that operates over in-memory
                 collections of Java objects, making the transformation
                 and analysis of data more convenient, scalable and
                 efficient. In this paper, we build on Java 8 Stream and
                 add a DistributableStream abstraction that supports
                 federated query execution over an extensible set of
                 distributed compute engines. Each query eventually
                 results in the creation of a materialized result that
                 is returned either as a local object or as an engine
                 defined distributed Java Collection that can be saved
                 and/or used as a source for future queries.
                 Distinctively, DistributableStream supports the
                 changing of compute engines both between and within a
                 query, allowing different parts of a computation to be
                 executed on different platforms. At execution time, the
                 query is organized as a sequence of pipelined stages,
                 each stage potentially running on a different engine.
                 Each node that is part of a stage executes its portion
                 of the computation on the data available locally or
                 produced by the previous stage of the computation. This
                 approach allows for computations to be assigned to
                 engines based on pricing, data locality, and resource
                 availability. Coupled with the inherent laziness of
                 stream operations, this brings great flexibility to
                 query planning and separates the semantics of the query
                 from the details of the engine used to execute it. We
                 currently support three engines, Local, Apache Hadoop
                 MapReduce and Oracle Coherence, and we illustrate how
                 new engines and data sources can be added.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2014:JEP,
  author =       "Jae-Gil Lee and Gopi Attaluri and Ronald Barber and
                 Naresh Chainani and Oliver Draese and Frederick Ho and
                 Stratos Idreos and Min-Soo Kim and Sam Lightstone and
                 Guy Lohman and Konstantinos Morfonios and Keshava
                 Murthy and Ippokratis Pandis and Lin Qiao and
                 Vijayshankar Raman and Vincent Kulandai Samy and
                 Richard Sidle and Knut Stolze and Liping Zhang",
  title =        "Joins on encoded and partitioned data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1355--1366",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Compression has historically been used to reduce the
                 cost of storage, I/Os from that storage, and buffer
                 pool utilization, at the expense of the CPU required to
                 decompress data every time it is queried. However,
                 significant additional CPU efficiencies can be achieved
                 by deferring decompression as late in query processing
                 as possible and performing query processing operations
                 directly on the still-compressed data. In this paper,
                 we investigate the benefits and challenges of
                 performing joins on compressed (or encoded) data. We
                 demonstrate the benefit of independently optimizing the
                 compression scheme of each join column, even though
                 join predicates relating values from multiple columns
                 may require translation of the encoding of one join
                 column into the encoding of the other. We also show the
                 benefit of compressing ``payload'' data other than the
                 join columns ``on the fly,'' to minimize the size of
                 hash tables used in the join. By partitioning the
                 domain of each column and defining separate
                 dictionaries for each partition, we can achieve even
                 better overall compression as well as increased
                 flexibility in dealing with new values introduced by
                 updates. Instead of decompressing both join columns
                 participating in a join to resolve their different
                 compression schemes, our system performs a light-weight
                 mapping of only qualifying rows from one of the join
                 columns to the encoding space of the other at run time.
                 Consequently, join predicates can be applied directly
                 on the compressed data. We call this procedure encoding
                 translation. Two alternatives of encoding translation
                 are developed and compared in the paper. We provide a
                 comprehensive evaluation of these alternatives using
                 product implementations of each on the TPC-H data set,
                 and demonstrate that performing joins on encoded and
                 partitioned data achieves both superior performance and
                 excellent compression.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Poess:2014:TFI,
  author =       "Meikel Poess and Tilmann Rabl and Hans-Arno Jacobsen
                 and Brian Caufield",
  title =        "{TPC--DI}: the first industry benchmark for data
                 integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1367--1378",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Historically, the process of synchronizing a decision
                 support system with data from operational systems has
                 been referred to as Extract, Transform, Load (ETL) and
                 the tools supporting such process have been referred to
                 as ETL tools. Recently, ETL was replaced by the more
                 comprehensive acronym, data integration (DI). DI
                 describes the process of extracting and combining data
                 from a variety of data source formats, transforming
                 that data into a unified data model representation and
                 loading it into a data store. This is done in the
                 context of a variety of scenarios, such as data
                 acquisition for business intelligence, analytics and
                 data warehousing, but also synchronization of data
                 between operational applications, data migrations and
                 conversions, master data management, enterprise data
                 sharing and delivery of data services in a
                 service-oriented architecture context, amongst others.
                 With these scenarios relying on up-to-date information
                 it is critical to implement a highly performing,
                 scalable and easy to maintain data integration system.
                 This is especially important as the complexity, variety
                 and volume of data is constantly increasing and
                 performance of data integration systems is becoming
                 very critical. Despite the significance of having a
                 highly performing DI system, there has been no industry
                 standard for measuring and comparing their performance.
                 The TPC, acknowledging this void, has released TPC-DI,
                 an innovative benchmark for data integration. This
                 paper motivates the reasons behind its development,
                 describes its main characteristics including workload,
                 run rules, metric, and explains key decisions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gupta:2014:RTT,
  author =       "Pankaj Gupta and Venu Satuluri and Ajeet Grewal and
                 Siva Gurumurthy and Volodymyr Zhabiuk and Quannan Li
                 and Jimmy Lin",
  title =        "Real-time {Twitter} recommendation: online motif
                 detection in large dynamic graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1379--1380",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We describe a production Twitter system for generating
                 relevant, personalized, and timely recommendations
                 based on observing the temporally-correlated actions of
                 each user's followings. The system currently serves
                 millions of recommendations daily to tens of millions
                 of mobile users. The approach can be viewed as a
                 specific instance of the novel problem of online motif
                 detection in large dynamic graphs. Our current solution
                 partitions the graph across a number of machines, and
                 with the construction of appropriate data structures,
                 motif detection can be translated into the lookup and
                 intersection of adjacency lists in each partition. We
                 conclude by discussing a generalization of the problem
                 that perhaps represents a new class of data management
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cha:2014:IDN,
  author =       "Sang K. Cha and Kunsoo Park and Changbin Song and
                 Kihong Kim and Cheol Ryu and Sunho Lee",
  title =        "Interval disaggregate: a new operator for business
                 planning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1381--1392",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Business planning as well as analytics on top of
                 large-scale database systems is valuable to decision
                 makers, but planning operations known and implemented
                 so far are very basic. In this paper we propose a new
                 planning operation called interval disaggregate, which
                 goes as follows. Suppose that the planner, typically
                 the management of a company, plans sales revenues of
                 its products in the current year. An interval of the
                 expected revenue for each product in the current year
                 is computed from historical data in the database as the
                 prediction interval of linear regression on the data. A
                 total target revenue for the current year is given by
                 the planner. The goal of the interval disaggregate
                 operation is to find an appropriate disaggregation of
                 the target revenue, considering the intervals. We
                 formulate the problem of interval disaggregation more
                 precisely and give solutions for the problem.
                 Multidimensional geometry plays a crucial role in the
                 problem formulation and the solutions. We implemented
                 interval disaggregation into the planning engine of SAP
                 HANA and did experiments on real-world data. Our
                 experiments show that interval disaggregation gives
                 more appropriate solutions with respect to historical
                 data than the known basic disaggregation called
                 referential disaggregation. We also show that interval
                 disaggregation can be combined with the
                 deseasonalization technique when the dataset shows
                 seasonal fluctuations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:FFT,
  author =       "Zhuo Zhang and Chao Li and Yangyu Tao and Renyu Yang
                 and Hong Tang and Jie Xu",
  title =        "{Fuxi}: a fault-tolerant resource management and job
                 scheduling system at {Internet} scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1393--1404",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Scalability and fault-tolerance are two fundamental
                 challenges for all distributed computing at Internet
                 scale. Despite many recent advances from both academia
                 and industry, these two problems are still far from
                 settled. In this paper, we present Fuxi, a resource
                 management and job scheduling system that is capable of
                 handling the kind of workload at Alibaba where hundreds
                 of terabytes of data are generated and analyzed
                 everyday to help optimize the company's business
                 operations and user experiences. We employ several
                 novel techniques to enable Fuxi to perform efficient
                 scheduling of hundreds of thousands of concurrent tasks
                 over large clusters with thousands of nodes: (1) an
                 incremental resource management protocol that supports
                 multi-dimensional resource allocation and data
                 locality; (2) user-transparent failure recovery where
                 failures of any Fuxi components will not impact the
                 execution of user jobs; and (3) an effective detection
                 mechanism and a multi-level blacklisting scheme that
                 prevents them from affecting job execution. Our
                 evaluation results demonstrate that 95\% and 91\%
                 scheduled CPU/memory utilization can be fulfilled under
                 synthetic workloads, and Fuxi is capable of achieving
                 2.36T-B/minute throughput in GraySort. Additionally,
                 the same Fuxi job only experiences approximately 16\%
                 slowdown under a 5\% fault-injection rate. The slowdown
                 only grows to 20\% when we double the fault-injection
                 rate to 10\%. Fuxi has been deployed in our production
                 environment since 2009, and it now manages hundreds of
                 thousands of server nodes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Simmen:2014:LSG,
  author =       "David Simmen and Karl Schnaitter and Jeff Davis and
                 Yingjie He and Sangeet Lohariwala and Ajay Mysore and
                 Vinayak Shenoi and Mingfeng Tan and Yu Xiao",
  title =        "Large-scale graph analytics in {Aster 6}: bringing
                 context to big data discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1405--1416",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph analytics is an important big data discovery
                 technique. Applications include identifying influential
                 employees for retention, detecting fraud in a complex
                 interaction network, and determining product affinities
                 by exploiting community buying patterns. Specialized
                 platforms have emerged to satisfy the unique processing
                 requirements of large-scale graph analytics; however,
                 these platforms do not enable graph analytics to be
                 combined with other analytics techniques, nor do they
                 work well with the vast ecosystem of SQL-based business
                 applications. Teradata Aster 6.0 adds support for
                 large-scale graph analytics to its repertoire of
                 analytics capabilities. The solution extends the
                 multi-engine processing architecture with support for
                 bulk synchronous parallel execution, and a specialized
                 graph engine that enables iterative analysis of graph
                 structures. Graph analytics functions written to the
                 vertex-oriented API exposed by the graph engine can be
                 invoked from the context of an SQL query and composed
                 with existing SQL-MR functions, thereby enabling data
                 scientists and business applications to express
                 computations that combine large-scale graph analytics
                 with techniques better suited to a different style of
                 processing. The solution includes a suite of pre-built
                 graph analytic functions adapted for parallel
                 execution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2014:FFK,
  author =       "Zhimin Chen and Vivek Narasayya and Surajit
                 Chaudhuri",
  title =        "Fast foreign-key detection in {Microsoft SQL} server
                 {PowerPivot} for {Excel}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1417--1428",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Microsoft SQL Server PowerPivot for Excel, or
                 PowerPivot for short, is an in-memory business
                 intelligence (BI) engine that enables Excel users to
                 interactively create pivot tables over large data sets
                 imported from sources such as relational databases,
                 text files and web data feeds. Unlike traditional pivot
                 tables in Excel that are defined on a single table,
                 PowerPivot allows analysis over multiple tables
                 connected via foreign-key joins. In many cases however,
                 these foreign-key relationships are not known a priori,
                 and information workers are often not be sophisticated
                 enough to define these relationships. Therefore, the
                 ability to automatically discover foreign-key
                 relationships in PowerPivot is valuable, if not
                 essential. The key challenge is to perform this
                 detection interactively and with high precision even
                 when data sets scale to hundreds of millions of rows
                 and the schema contains tens of tables and hundreds of
                 columns. In this paper, we describe techniques for fast
                 foreign-key detection in PowerPivot and experimentally
                 evaluate its accuracy, performance and scale on both
                 synthetic benchmarks and real-world data sets. These
                 techniques have been incorporated into PowerPivot for
                 Excel.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2014:BDS,
  author =       "Meng-Chieh Yu and Tong Yu and Shao-Chen Wang and
                 Chih-Jen Lin and Edward Y. Chang",
  title =        "Big data small footprint: the design of a low-power
                 classifier for detecting transportation modes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1429--1440",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Sensors on mobile phones and wearables, and in general
                 sensors on IoT (Internet of Things), bring forth a
                 couple of new challenges to big data research. First,
                 the power consumption for analyzing sensor data must be
                 low, since most wearables and portable devices are
                 power-strapped. Second, the velocity of analyzing big
                 data on these devices must be high, otherwise the
                 limited local storage may overflow. This paper presents
                 our hardware-software co-design of a classifier for
                 wearables to detect a person's transportation mode
                 (i.e., still, walking, running, biking, and on a
                 vehicle). We particularly focus on addressing the
                 big-data small-footprint requirement by designing a
                 classifier that is low in both computational complexity
                 and memory requirement. Together with a sensor-hub
                 configuration, we are able to drastically reduce power
                 consumption by 99\%, while maintaining competitive
                 mode-detection accuracy. The data used in the paper is
                 made publicly available for conducting research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Boykin:2014:SFI,
  author =       "Oscar Boykin and Sam Ritchie and Ian O'Connell and
                 Jimmy Lin",
  title =        "{Summingbird}: a framework for integrating batch and
                 online {MapReduce} computations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1441--1451",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Summingbird is an open-source domain-specific language
                 implemented in Scala and designed to integrate online
                 and batch MapReduce computations in a single framework.
                 Summingbird programs are written using dataflow
                 abstractions such as sources, sinks, and stores, and
                 can run on different execution platforms: Hadoop for
                 batch processing (via Scalding/Cascading) and Storm for
                 online processing. Different execution modes require
                 different bindings for the dataflow abstractions (e.g.,
                 HDFS files or message queues for the source) but do not
                 require any changes to the program logic. Furthermore,
                 Summingbird can operate in a hybrid processing mode
                 that transparently integrates batch and online results
                 to efficiently generate up-to-date aggregations over
                 long time spans. The language was designed to improve
                 developer productivity and address pain points in
                 building analytics solutions at Twitter where often,
                 the same code needs to be written twice (once for batch
                 processing and again for online processing) and
                 indefinitely maintained in parallel. Our key insight is
                 that certain algebraic structures provide the
                 theoretical foundation for integrating batch and online
                 processing in a seamless fashion. This means that
                 Summingbird imposes constraints on the types of
                 aggregations that can be performed, although in
                 practice we have not found these constraints to be
                 overly restrictive for a broad range of analytics tasks
                 at Twitter.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ahmed:2014:SBT,
  author =       "Rafi Ahmed and Rajkumar Sen and Meikel Poess and Sunil
                 Chakkappen",
  title =        "Of snowstorms and bushy trees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1452--1461",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many workloads for analytical processing in commercial
                 RDBMSs are dominated by snowstorm queries, which are
                 characterized by references to multiple large fact
                 tables and their associated smaller dimension tables.
                 This paper describes a technique for bushy join tree
                 optimization for snowstorm queries in Oracle database
                 system. This technique generates bushy join trees
                 containing subtrees that produce substantially reduced
                 sets of rows and, therefore, their joins with other
                 subtrees are generally much more efficient than joins
                 in the left-deep trees. The generation of bushy join
                 trees within an existing commercial physical optimizer
                 requires extensive changes to the optimizer. Further,
                 the optimizer will have to consider a large join
                 permutation search space to generate efficient bushy
                 join trees. The novelty of the approach is that bushy
                 join trees can be generated outside the physical
                 optimizer using logical query transformation that
                 explores a considerably pruned search space. The paper
                 describes an algorithm for generating optimal bushy
                 join trees for snowstorm queries using an existing
                 query transformation framework. It also presents
                 performance results for this optimization, which show
                 significant execution time improvements.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vemuri:2014:EPS,
  author =       "Srinivas Vemuri and Maneesh Varshney and Krishna
                 Puttaswamy and Rui Liu",
  title =        "Execution primitives for scalable joins and
                 aggregations in {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1462--1473",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analytics on Big Data is critical to derive business
                 insights and drive innovation in today's Internet
                 companies. Such analytics involve complex computations
                 on large datasets, and are typically performed on
                 MapReduce based frameworks such as Hive and Pig.
                 However, in our experience, these systems are still
                 quite limited in performing at scale. In particular,
                 calculations that involve complex joins and
                 aggregations, e.g. statistical calculations, scale
                 poorly on these systems. In this paper we propose novel
                 primitives for scaling such calculations. We propose a
                 new data model for organizing datasets into calculation
                 data units that are organized based on user-defined
                 cost functions. We propose new operators that take
                 advantage of these organized data units to
                 significantly speed up joins and aggregations. Finally,
                 we propose strategies for dividing the aggregation load
                 uniformly across worker processes that are very
                 effective in avoiding skews and reducing (or in some
                 cases even removing) the associated overheads. We have
                 implemented all our proposed primitives in a framework
                 called Rubix, which has been in production at LinkedIn
                 for nearly a year. Rubix powers several applications
                 and processes TBs of data each day. We have seen
                 remarkable improvements in speed and cost of complex
                 calculations due to these primitives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arauz:2014:CLT,
  author =       "Javier Arauz",
  title =        "{CAP} limits in telecom subscriber database design",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1474--1483",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While the notion of a Distributed DBMS has been
                 familiar to the IT industry for several decades, within
                 telecom networks the subscriber data management based
                 on DDBMS technology is a novel addition to a service
                 provider's infrastructure. Service providers are used
                 to telecom networks that are efficient, reliable and
                 easy to maintain and operate, in part thanks to the
                 node model used in designing such networks. A DDBMS
                 spanning a large geographical area however incurs into
                 distributed systems issues not previously seen in
                 telecom networks. Identifying and delivering the right
                 set of trade-offs that satisfies the service providers'
                 needs while staying within the known physical bounds of
                 a distributed system is therefore crucial if DDBMS are
                 to conquer the subscriber management space within
                 telecom networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bruno:2014:AJS,
  author =       "Nicolas Bruno and YongChul Kwon and Ming-Chuan Wu",
  title =        "Advanced join strategies for large-scale distributed
                 computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1484--1495",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Companies providing cloud-scale data services have
                 increasing needs to store and analyze massive data sets
                 (e.g., search logs, click streams, and web graph data).
                 For cost and performance reasons, processing is
                 typically done on large clusters of thousands of
                 commodity machines by using high level scripting
                 languages. In the recent past, there has been
                 significant progress in adapting well-known techniques
                 from traditional relational DBMSs to this new scenario.
                 However, important challenges remain open. In this
                 paper we study the very common join operation, discuss
                 some unique challenges in the large-scale distributed
                 scenario, and explain how to efficiently and robustly
                 process joins in a distributed way. Specifically, we
                 introduce novel execution strategies that leverage
                 opportunities not available in centralized scenarios,
                 and others that robustly handle data skew. We report
                 experimental validations of our approaches on Scope
                 production clusters, which power the Applications and
                 Services Group at Microsoft.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2014:DSG,
  author =       "Yue Liu and Songlin Hu and Tilmann Rabl and Wantao Liu
                 and Hans-Arno Jacobsen and Kaifeng Wu and Jian Chen and
                 Jintao Li",
  title =        "{DGFIndex} for smart grid: enhancing {Hive} with a
                 cost-effective multidimensional range index",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1496--1507",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In Smart Grid applications, as the number of deployed
                 electric smart meters increases, massive amounts of
                 valuable meter data is generated and collected every
                 day. To enable reliable data collection and make
                 business decisions fast, high throughput storage and
                 high-performance analysis of massive meter data become
                 crucial for grid companies. Considering the advantage
                 of high efficiency, fault tolerance, and
                 price-performance of Hadoop and Hive systems, they are
                 frequently deployed as underlying platform for big data
                 processing. However, in real business use cases, these
                 data analysis applications typically involve
                 multidimensional range queries (MDRQ) as well as batch
                 reading and statistics on the meter data. While Hive is
                 high-performance at complex data batch reading and
                 analysis, it lacks efficient indexing techniques for
                 MDRQ. In this paper, we propose DGFIndex, an index
                 structure for Hive that efficiently supports MDRQ for
                 massive meter data. DGFIndex divides the data space
                 into cubes using the grid file technique. Unlike the
                 existing indexes in Hive, which stores all combinations
                 of multiple dimensions, DGFIndex only stores the
                 information of cubes. This leads to smaller index size
                 and faster query processing. Furthermore, with
                 pre-computing user-defined aggregations of each cube,
                 DGFIndex only needs to access the boundary region for
                 aggregation query. Our comprehensive experiments show
                 that DGFIndex can save significant disk space in
                 comparison with the existing indexes in Hive and the
                 query performance with DGFIndex is 2-50 times faster
                 than existing indexes in Hive and HadoopDB for
                 aggregation query, 2-5 times faster than both for
                 non-aggregation query, 2-75 times faster than scanning
                 the whole table in different query selectivity.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yan:2014:EBS,
  author =       "Ying Yan and Liang Jeff Chen and Zheng Zhang",
  title =        "Error-bounded sampling for analytics on big sparse
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1508--1519",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Aggregation queries are at the core of business
                 intelligence and data analytics. In the big data era,
                 many scalable shared-nothing systems have been
                 developed to process aggregation queries over massive
                 amount of data. Microsoft's SCOPE is a well-known
                 instance in this category. Nevertheless, aggregation
                 queries are still expensive, because query processing
                 needs to consume the entire data set, which is often
                 hundreds of terabytes. Data sampling is a technique
                 that samples a small portion of data to process and
                 returns an approximate result with an error bound,
                 thereby reducing the query's execution time. While
                 similar problems were studied in the database
                 literature, we encountered new challenges that disable
                 most of prior efforts: (1) error bounds are dictated by
                 end users and cannot be compromised, (2) data is
                 sparse, meaning data has a limited population but a
                 wide range. For such cases, conventional uniform
                 sampling often yield high sampling rates and thus
                 deliver limited or no performance gains. In this paper,
                 we propose error-bounded stratified sampling to reduce
                 sample size. The technique relies on the insight that
                 we may only reduce the sampling rate with the knowledge
                 of data distributions. The technique has been
                 implemented into Microsoft internal search query
                 platform. Results show that the proposed approach can
                 reduce up to 99\% sample size comparing with uniform
                 sampling, and its performance is robust against data
                 volume and other key performance metrics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gankidi:2014:IHD,
  author =       "Vinitha Reddy Gankidi and Nikhil Teletia and Jignesh
                 M. Patel and Alan Halverson and David J. DeWitt",
  title =        "Indexing {HDFS} data in {PDW}: splitting the data from
                 the index",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1520--1528",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There is a growing interest in making relational DBMSs
                 work synergistically with MapReduce systems. However,
                 there are interesting technical challenges associated
                 with figuring out the right balance between the use and
                 co-deployment of these systems. This paper focuses on
                 one specific aspect of this balance, namely how to
                 leverage the superior indexing and query processing
                 power of a relational DBMS for data that is often more
                 cost-effectively stored in Hadoop/HDFS. We present a
                 method to use conventional B+-tree indices in an RDBMS
                 for data stored in HDFS and demonstrate that our
                 approach is especially effective for highly selective
                 queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2014:CLS,
  author =       "Chong Sun and Narasimhan Rampalli and Frank Yang and
                 AnHai Doan",
  title =        "{Chimera}: large-scale classification using machine
                 learning, rules, and crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1529--1540",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale classification is an increasingly critical
                 Big Data problem. So far, however, very little has been
                 published on how this is done in practice. In this
                 paper we describe Chimera, our solution to classify
                 tens of millions of products into 5000+ product types
                 at WalmartLabs. We show that at this scale, many
                 conventional assumptions regarding learning and
                 crowdsourcing break down, and that existing solutions
                 cease to work. We describe how Chimera employs a
                 combination of learning, rules (created by in-house
                 analysts), and crowdsourcing to achieve accurate,
                 continuously improving, and cost-effective
                 classification. We discuss a set of lessons learned for
                 other similar Big Data systems. In particular, we argue
                 that at large scales crowdsourcing is critical, but
                 must be used in combination with learning, rules, and
                 in-house analysts. We also argue that using rules (in
                 conjunction with learning) is a must, and that more
                 research attention should be paid to helping analysts
                 create and manage (tens of thousands of) rules more
                 effectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bonifati:2014:IJQ,
  author =       "Angela Bonifati and Radu Ciucanu and S{\L}Awek
                 Staworko",
  title =        "Interactive join query inference with {JIM}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1541--1544",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Specifying join predicates may become a cumbersome
                 task in many situations e.g., when the relations to be
                 joined come from disparate data sources, when the
                 values of the attributes carry little or no knowledge
                 of metadata, or simply when the user is unfamiliar with
                 querying formalisms. Such task is recurrent in many
                 traditional data management applications, such as data
                 integration, constraint inference, and database
                 denormalization, but it is also becoming pivotal in
                 novel crowdsourcing applications. We present Jim (Join
                 Inference Machine), a system for interactive join
                 specification tasks, where the user infers an $n$-ary
                 join predicate by selecting tuples that are part of the
                 join result via Boolean membership queries. The user
                 can label tuples as positive or negative, while the
                 system allows to identify and gray out the
                 uninformative tuples i.e., those that do not add any
                 information to the final learning goal. The tool also
                 guides the user to reach her join inference goal with a
                 minimal number of interactions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zheng:2014:MMS,
  author =       "Yuxin Zheng and Zhifeng Bao and Lidan Shou and Anthony
                 K. H. Tung",
  title =        "{MESA}: a map service to support fuzzy type-ahead
                 search over geo-textual data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1545--1548",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Geo-textual data are ubiquitous these days. Recent
                 study on spatial keyword search focused on the
                 processing of queries which retrieve objects that match
                 certain keywords within a spatial region. To ensure
                 effective data retrieval, various extensions were done
                 including the tolerance of errors in keyword matching
                 and the search-as-you-type feature using prefix
                 matching. We present MESA, a map application to support
                 different variants of spatial keyword query. In this
                 demonstration, we adopt the autocompletion paradigm
                 that generates the initial query as a prefix matching
                 query. If there are few matching results, other
                 variants are performed as a form of relaxation that
                 reuses the processing done in earlier phases. The types
                 of relaxation allowed include spatial region expansion
                 and exact/approximate prefix/substring matching. MESA
                 adopts the client-server architecture. It provides
                 fuzzy type-ahead search over geo-textual data. The core
                 of MESA is to adopt a unifying search strategy, which
                 incrementally applies the relaxation in an appropriate
                 order to maximize the efficiency of query processing.
                 In addition, MESA equips a user-friendly interface to
                 interact with users and visualize results. MESA also
                 provides customized search to meet the needs of
                 different users.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2014:RRT,
  author =       "Henan Wang and Guoliang Li and Huiqi Hu and Shuo Chen
                 and Bingwen Shen and Hao Wu and Wen-Syan Li and
                 Kian-Lee Tan",
  title =        "{R3}: a real-time route recommendation system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1549--1552",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Existing route recommendation systems have two main
                 weaknesses. First, they usually recommend the same
                 route for all users and cannot help control traffic
                 jam. Second, they do not take full advantage of
                 real-time traffic to recommend the best routes. To
                 address these two problems, we develop a real-time
                 route recommendation system, called R3, aiming to
                 provide users with the real-time-traffic-aware routes.
                 R3 recommends diverse routes for different users to
                 alleviate the traffic pressure. R3 utilizes historical
                 taxi driving data and real-time traffic data and
                 integrates them together to provide users with
                 real-time route recommendation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Benedikt:2014:PPD,
  author =       "Michael Benedikt and Julien Leblay and Efthymia
                 Tsamoura",
  title =        "{PDQ}: proof-driven query answering over {Web}-based
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1553--1556",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The data needed to answer queries is often available
                 through Web-based APIs. Indeed, for a given query there
                 may be many Web-based sources which can be used to
                 answer it, with the sources overlapping in their
                 vocabularies, and differing in their access
                 restrictions (required arguments) and cost. We
                 introduce PDQ (Proof-Driven Query Answering), a system
                 for determining a query plan in the presence of
                 web-based sources. It is: (i) constraint-aware ---
                 exploiting relationships between sources to rewrite an
                 expensive query into a cheaper one, (ii) access-aware
                 --- abiding by any access restrictions known in the
                 sources, and (iii) cost-aware --- making use of any
                 cost information that is available about services. PDQ
                 takes the novel approach of generating query plans from
                 proofs that a query is answerable. We demonstrate the
                 use of PDQ and its effectiveness in generating low-cost
                 plans.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hassan:2014:DFA,
  author =       "Naeemul Hassan and Afroza Sultana and You Wu and
                 Gensheng Zhang and Chengkai Li and Jun Yang and Cong
                 Yu",
  title =        "Data in, fact out: automated monitoring of facts by
                 {FactWatcher}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1557--1560",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Towards computational journalism, we present
                 FactWatcher, a system that helps journalists identify
                 data-backed, attention-seizing facts which serve as
                 leads to news stories. FactWatcher discovers three
                 types of facts, including situational facts,
                 one-of-the-few facts, and prominent streaks, through a
                 unified suite of data model, algorithm framework, and
                 fact ranking measure. Given an append-only database,
                 upon the arrival of a new tuple, FactWatcher monitors
                 if the tuple triggers any new facts. Its algorithms
                 efficiently search for facts without exhaustively
                 testing all possible ones. Furthermore, FactWatcher
                 provides multiple features in striving for an
                 end-to-end system, including fact ranking,
                 fact-to-statement translation and keyword-based fact
                 search.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2014:ODA,
  author =       "Mingxuan Yuan and Ke Deng and Jia Zeng and Yanhua Li
                 and Bing Ni and Xiuqiang He and Fei Wang and Wenyuan
                 Dai and Qiang Yang",
  title =        "{OceanST}: a distributed analytic system for
                 large-scale spatiotemporal mobile broadband data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1561--1564",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increasing prevalence of versatile mobile
                 devices and the fast deployment of broadband mobile
                 networks, a huge volume of Mobile Broadband (MBB) data
                 has been generated over time. The MBB data naturally
                 contain rich information of a large number of mobile
                 users, covering a considerable fraction of whole
                 population nowadays, including the mobile applications
                 they are using at different locations and time; the MBB
                 data may present the unprecedentedly large knowledge
                 base of human behavior which has highly recognized
                 commercial and social value. However, the storage,
                 management and analysis of the huge and fast growing
                 volume of MBB data post new and significant challenges
                 to the industrial practitioners and research community.
                 In this demonstration, we present a new, MBB data
                 tailored, distributed analytic system named OceanST
                 which has addressed a series of problems and weaknesses
                 of the existing systems, originally designed for more
                 general purpose and capable to handle MBB data to some
                 extent. OceanST is featured by (i) efficiently loading
                 of ever-growing MBB data, (ii) a bunch of
                 spatiotemporal aggregate queries and basic analysis
                 APIs frequently found in various MBB data application
                 scenarios, and (iii) sampling-based approximate
                 solution with provable accuracy bound to cope with huge
                 volume of MBB data. The demonstration will show the
                 advantage of OceanST in a cluster of 5 machines using
                 3TB data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Geerts:2014:TAF,
  author =       "Floris Geerts and Giansalvatore Mecca and Paolo
                 Papotti and Donatello Santoro",
  title =        "That's all folks!: {Llunatic} goes open source",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1565--1568",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "It is widely recognized that whenever different data
                 sources need to be integrated into a single target
                 database errors and inconsistencies may arise, so that
                 there is a strong need to apply data-cleaning
                 techniques to repair the data. Despite this need,
                 database research has so far investigated mappings and
                 data repairing essentially in isolation. Unfortunately,
                 schema-mappings and data quality rules interact with
                 each other, so that applying existing algorithms in a
                 pipelined way --- i.e., first exchange then data, then
                 repair the result --- does not lead to solutions even
                 in simple settings. We present the Llunatic mapping and
                 cleaning system, the first comprehensive proposal to
                 handle schema mappings and data repairing in a uniform
                 way. Llunatic is based on the intuition that
                 transforming and cleaning data are different facets of
                 the same problem, unified by their declarative nature.
                 This holistic approach allows us to incorporate unique
                 features into the system, such as configurable user
                 interaction and a tunable trade-off between efficiency
                 and quality of the solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2014:HMA,
  author =       "Weimo Liu and Saad Bin Suhaim and Saravanan
                 Thirumuruganathan and Nan Zhang and Gautam Das and Ali
                 Jaoua",
  title =        "{HDBTracker}: monitoring the aggregates on dynamic
                 hidden web databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1569--1572",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Numerous web databases, e.g., amazon.com, eBay.com,
                 are ``hidden'' behind (i.e., accessible only through)
                 their restrictive search and browsing interfaces. This
                 demonstration showcases HDBTracker, a web-based system
                 that reveals and tracks (the changes of) user-specified
                 aggregate queries over such hidden web databases,
                 especially those that are frequently updated, by
                 issuing a small number of search queries through the
                 public web interfaces of these databases. The ability
                 to track and monitor aggregates has applications over a
                 wide variety of domains --- e.g., government agencies
                 can track COUNT of openings at online job hunting
                 websites to understand key economic indicators, while
                 businesses can track the AVG price of a product over a
                 basket of e-commerce websites to understand the
                 competitive landscape and/or material costs. A key
                 technique used in HDBTracker is RS-ESTIMATOR, the first
                 algorithm that can efficiently monitor changes to
                 aggregate query answers over a hidden web database.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xia:2014:BBA,
  author =       "Fan Xia and Ye Li and Chengcheng Yu and Haixin Ma and
                 Weining Qian",
  title =        "{BSMA}: a benchmark for analytical queries over social
                 media data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1573--1576",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The demonstration of a benchmark, named as BSMA, for
                 Benchmarking Social Media Analytics, is introduced in
                 this paper. BSMA is designed to benchmark data
                 management systems supporting analytical queries over
                 social media. It is different to existing benchmarks in
                 that: (1) Both real-life data and a synthetic data
                 generator are provided. The real-life dataset contains
                 a social network of 1.6 million users, and all their
                 tweeting and retweeting activities. The data generator
                 can generate both social networks and synthetic
                 timelines that follow data distributions determined by
                 predefined parameters. (2) A set of workloads are
                 provided. The data generator is in responsible for
                 producing updates. A workload generator produces
                 queries based on predefined query templates by
                 generating query arguments online. BSMA workloads cover
                 a large amount of queries with graph operations,
                 temporal queries, hotspot queries, and aggregate
                 queries. Furthermore, the argument generator is capable
                 of sampling data items in the timeline following
                 power-law distribution online. (3) A toolkit is
                 provided to measure and report the performance of
                 systems that implement the benchmark. Furthermore, a
                 prototype system based on dataset and workloads of BSMA
                 is also implemented. The demonstration will include two
                 parts, i.e. the internals of data and workload
                 generator, as well as the performance testing of
                 reference implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Petermann:2014:GBD,
  author =       "Andr{\'e} Petermann and Martin Junghanns and Robert
                 M{\"u}ller and Erhard Rahm",
  title =        "Graph-based data integration and business intelligence
                 with {BIIIG}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1577--1580",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate BIIIG (Business Intelligence with
                 Integrated Instance Graphs), a new system for
                 graph-based data integration and analysis. It aims at
                 improving business analytics compared to traditional
                 OLAP approaches by comprehensively tracking
                 relationships between entities and making them
                 available for analysis. BIIIG supports a largely
                 automatic data integration pipeline for metadata and
                 instance data. Metadata from heterogeneous sources are
                 integrated in a so-called Unified Metadata Graph (UMG)
                 while instance data is combined in a single integrated
                 instance graph (IIG). A unique feature of BIIIG is the
                 concept of business transaction graphs, which are
                 derived from the IIG and which reflect all steps
                 involved in a specific business process. Queries and
                 analysis tasks can refer to the entire instance graph
                 or sets of business transaction graphs. In the
                 demonstration, we perform all data integration steps
                 and present analytic queries including pattern matching
                 and graph-based aggregation of business measures.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vartak:2014:SAG,
  author =       "Manasi Vartak and Samuel Madden and Aditya
                 Parameswaran and Neoklis Polyzotis",
  title =        "{SeeDB}: automatically generating query
                 visualizations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1581--1584",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data analysts operating on large volumes of data often
                 rely on visualizations to interpret the results of
                 queries. However, finding the right visualization for a
                 query is a laborious and time-consuming task. We
                 demonstrate SeeDB, a system that partially automates
                 this task: given a query, SeeDB explores the space of
                 all possible visualizations, and automatically
                 identifies and recommends to the analyst those
                 visualizations it finds to be most ``interesting'' or
                 ``useful''. In our demonstration, conference attendees
                 will see SeeDB in action for a variety of queries on
                 multiple real-world datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dutt:2014:QEA,
  author =       "Anshuman Dutt and Sumit Neelam and Jayant R. Haritsa",
  title =        "{QUEST}: an exploratory approach to robust query
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1585--1588",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lei:2014:RIR,
  author =       "Chuan Lei and Zhongfang Zhuang and Elke A.
                 Rundensteiner and Mohamed Y. Eltabakh",
  title =        "Redoop infrastructure for recurring big data queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1589--1592",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This demonstration presents the Redoop infrastructure,
                 the first full-fledged MapReduce framework with native
                 support for recurring big data queries. Recurring
                 queries, repeatedly being executed for long periods of
                 time over evolving high-volume data, have become a
                 bedrock component in most large-scale data analytic
                 applications. Redoop is a comprehensive extension to
                 Hadoop that pushes the support and optimization of
                 recurring queries into Hadoop's core functionality.
                 While backward compatible with regular MapReduce jobs,
                 Redoop achieves an order of magnitude better
                 performance than Hadoop for recurring workloads. Redoop
                 employs innovative window-aware optimization techniques
                 for such recurring workloads including adaptive
                 window-aware data partitioning, cache-aware task
                 scheduling, and inter-window caching mechanisms. We
                 will demonstrate Redoop's capabilities on a compute
                 cluster against real life workloads including
                 click-stream and sensor data analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Brucato:2014:PTP,
  author =       "Matteo Brucato and Rahul Ramakrishna and Azza Abouzied
                 and Alexandra Meliou",
  title =        "{PackageBuilder}: from tuples to packages",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1593--1596",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demo, we present PackageBuilder, a system that
                 extends database systems to support package queries. A
                 package is a collection of tuples that individually
                 satisfy base constraints and collectively satisfy
                 global constraints. The need for package support arises
                 in a variety of scenarios: For example, in the creation
                 of meal plans, users are not only interested in the
                 nutritional content of individual meals (base
                 constraints), but also care to specify daily
                 consumption limits and control the balance of the
                 entire plan (global constraints). We introduce PaQL, a
                 declarative SQL-based package query language, and the
                 interface abstractions which allow users to
                 interactively specify package queries and easily
                 navigate through their results. To efficiently evaluate
                 queries, the system employs pruning and heuristics, as
                 well as state-of-the-art constraint optimization
                 solvers. We demonstrate PackageBuilder by allowing
                 attendees to interact with the system's interface, to
                 define PaQL queries and to observe how query evaluation
                 is performed.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amsterdamer:2014:OAC,
  author =       "Yael Amsterdamer and Susan B. Davidson and Tova Milo
                 and Slava Novgorodov and Amit Somech",
  title =        "Ontology assisted crowd mining",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1597--1600",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present OASSIS (for Ontology ASSISted crowd
                 mining), a prototype system which allows users to
                 declaratively specify their information needs, and
                 mines the crowd for answers. The answers that the
                 system computes are concise and relevant, and represent
                 frequent, significant data patterns. The system is
                 based on (1) a generic model that captures both
                 ontological knowledge, as well as the individual
                 knowledge of crowd members from which frequent patterns
                 are mined; (2) a query language in which users can
                 specify their information needs and types of data
                 patterns they seek; and (3) an efficient query
                 evaluation algorithm, for mining semantically concise
                 answers while minimizing the number of questions posed
                 to the crowd. We will demonstrate OASSIS using a couple
                 of real-life scenarios, showing how users can formulate
                 and execute queries through the OASSIS UI and how the
                 relevant data is mined from the crowd.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2014:SSE,
  author =       "Lisi Chen and Yan Cui and Gao Cong and Xin Cao",
  title =        "{SOPS}: a system for efficient processing of
                 spatial-keyword publish\slash subscribe",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1601--1604",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Massive amount of data that are geo-tagged and
                 associated with text information are being generated at
                 an unprecedented scale. These geo-textual data cover a
                 wide range of topics. Users are interested in receiving
                 up-to-date geo-textual objects (e.g., geo-tagged
                 Tweets) such that their locations meet users' need and
                 their texts are interesting to users. For example, a
                 user may want to be updated with tweets near her home
                 on the topic ``dengue fever headache''. AB@In this
                 demonstration, we present SOPS, the Spatial-Keyword
                 Publish/Subscribe System, that is capable of
                 efficiently processing spatial keyword continuous
                 queries. SOPS supports two types of queries: (1)
                 Boolean Range Continuous (BRC) query that can be used
                 to subscribe the geo-textual objects satisfying a
                 boolean keyword expression and falling in a specified
                 spatial region; (2) Temporal Spatial-Keyword Top-$k$
                 Continuous (TaSK) query that continuously maintains
                 up-to-date top-$k$ most relevant results over a stream
                 of geo-textual objects. SOPS enables users to formulate
                 their queries and view the real-time results over a
                 stream of geo-textual objects by browser-based user
                 interfaces. On the server side, we propose solutions to
                 efficiently processing a large number of BRC queries
                 (tens of millions) and TaSK queries over a stream of
                 geo-textual objects.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shirakawa:2014:MLI,
  author =       "Masumi Shirakawa and Takahiro Hara and Shojiro
                 Nishio",
  title =        "{MLJ}: language-independent real-time search of tweets
                 reported by media outlets and journalists",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1605--1608",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration, we introduce MLJ (MultiLingual
                 Journalism, http://mljournalism.com), a first Web-based
                 system that enables users to search any topic of latest
                 tweets posted by media outlets and journalists beyond
                 languages. Handling multilingual tweets in real time
                 involves many technical challenges: language barrier,
                 sparsity of words, and real-time data stream. To
                 overcome the language barrier and the sparsity of
                 words, MLJ harnesses CL-ESA, a Wikipedia-based
                 language-independent method to generate a vector of
                 Wikipedia pages (entities) from an input text. To
                 continuously deal with tweet stream, we propose
                 one-pass DP-means, an online clustering method based on
                 DP-means. Given a new tweet as an input, MLJ generates
                 a vector using CL-ESA and classifies it into one of
                 clusters using one-pass DP-means. By interpreting a
                 search query as a vector, users can instantly search
                 clusters containing latest related tweets from the
                 query without being aware of language differences. MLJ
                 as of March 2014 supports nine languages including
                 English, Japanese, Korean, Spanish, Portuguese, German,
                 French, Italian, and Arabic covering 24 countries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bress:2014:OHO,
  author =       "Sebastian Bre{\ss} and Bastian K{\"o}cher and Max
                 Heimel and Volker Markl and Michael Saecker and Gunter
                 Saake",
  title =        "{Ocelot\slash HyPE}: optimized data processing on
                 heterogeneous hardware",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1609--1612",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The past years saw the emergence of highly
                 heterogeneous server architectures that feature
                 multiple accelerators in addition to the main
                 processor. Efficiently exploiting these systems for
                 data processing is a challenging research problem that
                 comprises many facets, including how to find an optimal
                 operator placement strategy, how to estimate runtime
                 costs across different hardware architectures, and how
                 to manage the code and maintenance blowup caused by
                 having to support multiple architectures. In prior
                 work, we already discussed solutions to some of these
                 problems: First, we showed that specifying operators in
                 a hardware-oblivious way can prevent code blowup while
                 still maintaining competitive performance when
                 supporting multiple architectures. Second, we presented
                 learning cost functions and several heuristics to
                 efficiently place operators across all available
                 devices. In this demonstration, we provide further
                 insights into this line of work by presenting our
                 combined system Ocelot/HyPE. Our system integrates a
                 hardware-oblivious data processing engine with a
                 learning query optimizer for placement decisions,
                 resulting in a highly adaptive DBMS that is
                 specifically tailored towards heterogeneous hardware
                 environments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2014:MMO,
  author =       "Fei Wu and Tobias Kin Hou Lei and Zhenhui Li and
                 Jiawei Han",
  title =        "{MoveMine 2.0}: mining object relationships from
                 movement data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1613--1616",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The development in positioning technology has enabled
                 us to collect a huge amount of movement data from
                 moving objects, such as human, animals, and vehicles.
                 The data embed rich information about the relationships
                 among moving objects and have applications in many
                 fields, e.g., in ecological study and human behavioral
                 study. Previously, we have proposed a system MoveMine
                 that integrates several start-of-art movement mining
                 methods. However, it does not include recent methods on
                 relationship pattern mining. Thus, we propose to extend
                 MoveMine to MoveMine 2.0 by adding substantial new
                 methods in mining dynamic relationship patterns. Newly
                 added methods focus on two types of pairwise
                 relationship patterns: (i) attraction/avoidance
                 relationship, and (ii) following pattern. A
                 user-friendly interface is designed to support
                 interactive exploration of the result and provides
                 flexibility in tuning parameters. MoveMine 2.0 is
                 tested on multiple types of real datasets to ensure its
                 practical use. Our system provides useful tools for
                 domain experts to gain insights on real dataset.
                 Meanwhile, it will promote further research in
                 relationship mining from moving objects.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2014:PFA,
  author =       "Liwen Sun and Sanjay Krishnan and Reynold S. Xin and
                 Michael J. Franklin",
  title =        "A partitioning framework for aggressive data
                 skipping",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1617--1620",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose to demonstrate a fine-grained partitioning
                 framework that reorganizes the data tuples into small
                 blocks at data loading time. The goal is to enable
                 queries to maximally skip scanning data blocks. The
                 partition framework consists of four steps: (1)
                 workload analysis, which extracts features from a query
                 workload, (2) augmentation, which augments each data
                 tuple with a feature vector, (3) reduce, which
                 succinctly represents a set of data tuples using a set
                 of feature vectors, and (4) partitioning, which
                 performs a clustering algorithm to partition the
                 feature vectors and uses the clustering result to guide
                 the actual data partitioning. Our experiments show that
                 our techniques result in a 3-7x query response time
                 improvement over traditional range partitioning due to
                 more effective data skipping.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2014:IOE,
  author =       "Lei Cao and Qingyang Wang and Elke A. Rundensteiner",
  title =        "Interactive outlier exploration in big data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1621--1624",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate our VSOutlier system for supporting
                 interactive exploration of outliers in big data
                 streams. VSOutlier not only supports a rich variety of
                 outlier types supported by innovative and efficient
                 outlier detection strategies, but also provides a rich
                 set of interactive interfaces to explore outliers in
                 real time. Using the stock transactions dataset from
                 the US stock market and the moving objects dataset from
                 MITRE, we demonstrate that the VSOutlier system enables
                 analysts to more efficiently identify, understand, and
                 respond to phenomena of interest in near real-time even
                 when applied to high volume streams.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{To:2014:SAE,
  author =       "Quoc-Cuong To and Benjamin Nguyen and Philippe
                 Pucheral",
  title =        "{SQL\slash AA}: executing {SQL} on an asymmetric
                 architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1625--1628",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Current applications, from complex sensor systems
                 (e.g. quantified self) to online e-markets acquire vast
                 quantities of personal information which usually end-up
                 on central servers. This information represents an
                 unprecedented potential for user customized
                 applications and business (e.g., car insurance billing,
                 carbon tax, traffic decongestion, resource optimization
                 in smart grids, healthcare surveillance, participatory
                 sensing). However, the PRISM affair has shown that
                 public opinion is starting to wonder whether these new
                 services are not bringing us closer to science fiction
                 dystopias. It has become clear that centralizing and
                 processing all one's data on a single server is a major
                 problem with regards to privacy concerns. Conversely,
                 decentralized architectures, devised to help
                 individuals keep full control of their data, complexify
                 global treatments and queries, often impeding the
                 development of innovative services and applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2014:GGS,
  author =       "Zhao Chen and Rui Fu and Ziyuan Zhao and Zheng Liu and
                 Leihao Xia and Lei Chen and Peng Cheng and Caleb Chen
                 Cao and Yongxin Tong and Chen Jason Zhang",
  title =        "{gMission}: a general spatial crowdsourcing platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1629--1632",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As one of the successful forms of using Wisdom of
                 Crowd, crowdsourcing, has been widely used for many
                 human intrinsic tasks, such as image labeling, natural
                 language understanding, market predication and opinion
                 mining. Meanwhile, with advances in pervasive
                 technology, mobile devices, such as mobile phones and
                 tablets, have become extremely popular. These mobile
                 devices can work as sensors to collect multimedia
                 data(audios, images and videos) and location
                 information. This power makes it possible to implement
                 the new crowdsourcing mode: spatial crowdsourcing. In
                 spatial crowdsourcing, a requester can ask for
                 resources related a specific location, the mobile users
                 who would like to take the task will travel to that
                 place and get the data. Due to the rapid growth of
                 mobile device uses, spatial crowdsourcing is likely to
                 become more popular than general crowdsourcing, such as
                 Amazon Turk and Crowdflower. However, to implement such
                 a platform, effective and efficient solutions for
                 worker incentives, task assignment, result aggregation
                 and data quality control must be developed. In this
                 demo, we will introduce gMission, a general spatial
                 crowdsourcing platform, which features with a
                 collection of novel techniques, including geographic
                 sensing, worker detection, and task recommendation. We
                 introduce the sketch of system architecture and
                 illustrate scenarios via several case analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cetintemel:2014:SSN,
  author =       "Ugur Cetintemel and Jiang Du and Tim Kraska and Samuel
                 Madden and David Maier and John Meehan and Andrew Pavlo
                 and Michael Stonebraker and Erik Sutherland and Nesime
                 Tatbul and Kristin Tufte and Hao Wang and Stanley
                 Zdonik",
  title =        "{S-Store}: a streaming {NewSQL} system for big
                 velocity applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1633--1636",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "First-generation streaming systems did not pay much
                 attention to state management via ACID transactions
                 (e.g., [3, 4]). S-Store is a data management system
                 that combines OLTP transactions with stream processing.
                 To create S-Store, we begin with H-Store, a main-memory
                 transaction processing engine, and add primitives to
                 support streaming. This includes triggers and
                 transaction workflows to implement push-based
                 processing, windows to provide a way to bound the
                 computation, and tables with hidden state to implement
                 scoping for proper isolation. This demo explores the
                 benefits of this approach by showing how a na{\"\i}ve
                 implementation of our benchmarks using only H-Store can
                 yield incorrect results. We also show that by
                 exploiting push-based semantics and our implementation
                 of triggers, we can achieve significant improvement in
                 transaction throughput. We demo two modern
                 applications: (i) leaderboard maintenance for a version
                 of ``American Idol'', and (ii) a city-scale bicycle
                 rental scenario.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xie:2014:CRT,
  author =       "Runquan Xie and Feida Zhu and Hui Ma and Wei Xie and
                 Chen Lin",
  title =        "{CLEar}: a real-time online observatory for bursty and
                 viral events",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1637--1640",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We describe our demonstration of CLEar (CLairaudient
                 Ear), a real-time online platform for detecting,
                 monitoring, summarizing, contextualizing and
                 visualizing bursty and viral events, those triggering a
                 sudden surge of public interest and going viral on
                 micro-blogging platforms. This task is challenging for
                 existing methods as they either use complicated topic
                 models to analyze topics in a off-line manner or define
                 temporal structure of fixed granularity on the data
                 stream for online topic learning, leaving them hardly
                 scalable for real-time stream like that of Twitter. In
                 this demonstration of CLEar, we present a three-stage
                 system: First, we show a real-time bursty event
                 detection module based on a data-sketch topic model
                 which makes use of acceleration of certain stream
                 quantities as the indicators of topic burstiness to
                 trigger efficient topic inference. Second, we
                 demonstrate popularity prediction for the detected
                 bursty topics and event summarization based on
                 clustering related topics detected in successive time
                 periods. Third, we illustrate CLEar's module for
                 contextualizing and visualizing the event evolution
                 both along time-line and across other news media to
                 offer an easier understanding of the events.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Suh:2014:ALI,
  author =       "Young-Kyoon Suh and Richard T. Snodgrass and Rui
                 Zhang",
  title =        "{AZDBLab}: a laboratory information system for
                 large-scale empirical {DBMS} studies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1641--1644",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the database field, while very strong mathematical
                 and engineering work has been done, the scientific
                 approach has been much less prominent. The deep
                 understanding of query optimizers obtained through the
                 scientific approach can lead to better engineered
                 designs. Unlike other domains, there have been few
                 DBMS-dedicated laboratories, focusing on such
                 scientific investigation. In this demonstration, we
                 present a novel DBMS-oriented research infrastructure,
                 called Arizona Database Laboratory (AZDBLab), to assist
                 database researchers in conducting a large-scale
                 empirical study across multiple DBMSes. For them to
                 test their hypotheses on the behavior of query
                 optimizers, AZDBLab can run and monitor a large-scale
                 experiment with thousands (or millions) of queries on
                 different DBMSes. Furthermore, AZDBLab can help users
                 automatically analyze these queries. In the demo, the
                 audience will interact with AZDBLab through the
                 stand-alone application and the mobile app to conduct
                 such a large-scale experiment for a study. The audience
                 will then run a Tucson Timing Protocol analysis on the
                 finished experiment and then see the analysis (data
                 sanity check and timing) results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2014:TTM,
  author =       "Qi Wang and Manohar Kaul and Cheng Long and Raymond
                 Chi-Wing Wong",
  title =        "{Terrain-Toolkit}: a multi-functional tool for terrain
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1645--1648",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Terrain data is becoming increasingly popular both in
                 industry and in academia. Many tools have been
                 developed for visualizing terrain data. However, we
                 find that (1) they usually accept very few data formats
                 of terrain data only; (2) they do not support terrain
                 simplification well which, as will be shown, is used
                 heavily for query processing in spatial databases; and
                 (3) they do not provide the surface distance operator
                 which is fundamental for many applications based on
                 terrain data. Motivated by this, we developed a tool
                 called Terrain-Toolkit for terrain data which accepts a
                 comprehensive set of data formats, supports terrain
                 simplification and provides the surface distance
                 operator.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fu:2014:FDC,
  author =       "Yupeng Fu and Kian Win Ong and Yannis Papakonstantinou
                 and Erick Zamora",
  title =        "Forward: data-centric {ULS} using declarative
                 templates that efficiently wrap third-party
                 {JavaScript} components",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1649--1652",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While Ajax programming and the plethora of JavaScript
                 component libraries enable high-quality Uls in web
                 applications, integrating them with page data is
                 laborious and error-prone as a developer has to
                 handcode incremental modifications with trigger-based
                 programming and manual coordination of data
                 dependencies. The FORWARD web framework simplifies the
                 development of Ajax applications through declarative,
                 state-based templates. This declarative, data-centric
                 approach is characterized by the principle of
                 logical/physical independence, which the database
                 community has often deployed successfully. It enables
                 FORWARD to leverage database techniques, such as
                 incremental view maintenance, updatable views,
                 capability-based component wrappers and cost-based
                 optimization to automate efficient live visualizations.
                 We demonstrate an end-to-end system implementation,
                 including a web-based IDE (itself built in FORWARD),
                 academic and commercial applications built in FORWARD
                 and a wide variety of JavaScript components supported
                 by the declarative templates.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lin:2014:SSP,
  author =       "Xika Lin and Abhishek Mukherji and Elke A.
                 Rundensteiner and Matthew O. Ward",
  title =        "{SPIRE}: supporting parameter-driven interactive rule
                 mining and exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1653--1656",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate our SPIRE technology for supporting
                 interactive mining of both positive and negative rules
                 at the speed of thought. It is often misleading to
                 learn only about positive rules, yet extremely
                 revealing to find strongly supported negative rules.
                 Key technical contributions of SPIRE including
                 region-wise abstractions of rules, positive-negative
                 rule relationship analysis, rule redundancy management
                 and rule visualization supporting novel exploratory
                 queries will be showcased. The audience can
                 interactively explore complex rule relationships in a
                 visual manner, such as comparing negative rules with
                 their positive counterparts, that would otherwise take
                 prohibitive time. Overall, our SPIRE system provides
                 data analysts with rich insights into rules and rule
                 relationships while significantly reducing manual
                 effort and time investment required.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Anderson:2014:IDE,
  author =       "Michael R. Anderson and Michael Cafarella and Yixing
                 Jiang and Guan Wang and Bochun Zhang",
  title =        "An integrated development environment for faster
                 feature engineering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1657--1660",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The application of machine learning to large datasets
                 has become a core component of many important and
                 exciting software systems being built today. The
                 extreme value in these trained systems is tempered,
                 however, by the difficulty of constructing them. As
                 shown by the experience of Google, Netflix, IBM, and
                 many others, a critical problem in building trained
                 systems is that of feature engineering. High-quality
                 machine learning features are crucial for the system's
                 performance but are difficult and time-consuming for
                 engineers to develop. Data-centric developer tools that
                 improve the productivity of feature engineers will thus
                 likely have a large impact on an important area of
                 work. We have built a demonstration integrated
                 development environment for feature engineers. It
                 accelerates one particular step in the feature
                 engineering development cycle: evaluating the
                 effectiveness of novel feature code. In particular, it
                 uses an index and runtime execution planner to process
                 raw data objects (e.g., Web pages) in order of
                 descending likelihood that the data object will be
                 relevant to the user's feature code. This demonstration
                 IDE allows the user to write arbitrary feature code,
                 evaluate its impact on learner quality, and observe
                 exactly how much faster our technique performs compared
                 to a baseline system.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xiong:2014:PSD,
  author =       "Pengcheng Xiong and Hakan Hacig{\"u}m{\"u}s",
  title =        "{Pronto}: a software-defined networking based system
                 for performance management of analytical queries on
                 distributed data stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1661--1664",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nowadays data analytics applications are accessing
                 more and more data from distributed data stores,
                 creating large amount of data traffic on the network.
                 Therefore, distributed analytic queries are prone to
                 suffer from bad performance in terms of query execution
                 time when they encounter a network resource contention,
                 which is quite common in a shared network. Typical
                 distributed query optimizers do not have a way to solve
                 this problem because historically they have been
                 treating the network underneath as a black-box: they
                 are unable to monitor it, let alone to control it.
                 However, we are entering a new era of software-defined
                 networking (SDN), which provides visibility into and
                 control of the network's state for the applications
                 including distributed database systems. In this
                 demonstration, we present a system, called Pronto that
                 leverages the SDN capabilities for a distributed query
                 processor to achieve performance improvement and
                 differentiation for analytical queries. The system is
                 the real implementation of our recently developed
                 methods on commercial SDN products. The demonstration
                 shows the shortcomings of a distributed query
                 optimizer, which treats the underlying network as a
                 black box, and the advantages of the SDN-based approach
                 by allowing the users to selectively explore various
                 relevant and interesting settings in a distributed
                 query processing environment.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:GYB,
  author =       "Rui Zhang and Reshu Jain and Prasenjit Sarkar and
                 Lukas Rupprecht",
  title =        "Getting your big data priorities straight: a
                 demonstration of priority-based {QoS} using
                 social-network-driven stock recommendation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1665--1668",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As we come to terms with various big data challenges,
                 one vital issue remains largely untouched. That is the
                 optimal multiplexing and prioritization of different
                 big data applications sharing the same underlying
                 infrastructure, for example, a public cloud platform.
                 Given these demanding applications and the necessary
                 practice to avoid over-provisioning, resource
                 contention between applications is inevitable. Priority
                 must be given to important applications (or sub
                 workloads in an application) in these circumstances.
                 This demo highlights the compelling impact
                 prioritization could make, using an example application
                 that recommends promising combinations of stocks to
                 purchase based on relevant Twitter sentiment. The
                 application consists of a batch job and an interactive
                 query, ran simultaneously. Our underlying solution
                 provides a unique capability to identify and
                 differentiate application workloads throughout a
                 complex big data platform. Its current implementation
                 is based on Apache Hadoop and the IBM GPFS distributed
                 storage system. The demo showcases the superior
                 interactive query performance achievable by
                 prioritizing its workloads and thereby avoiding I/O
                 bandwidth contention. The query time is 3.6 $ \times $
                 better compared to no prioritization. Such a
                 performance is within 0.3\% of that of an idealistic
                 system where the query runs without contention. The
                 demo is conducted on around 3 months of Twitter data,
                 pertinent to the S \& P 100 index, with about 4 $
                 \times $ 10$^{12}$ potential stock combinations
                 considered.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jindal:2014:VYR,
  author =       "Alekh Jindal and Praynaa Rawlani and Eugene Wu and
                 Samuel Madden and Amol Deshpande and Mike Stonebraker",
  title =        "{Vertexica}: your relational friend for graph
                 analytics!",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1669--1672",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we present Vertexica, a graph analytics
                 tools on top of a relational database, which is user
                 friendly and yet highly efficient. Instead of
                 constraining programmers to SQL, Vertexica offers a
                 popular vertex-centric query interface, which is more
                 natural for analysts to express many graph queries. The
                 programmers simply provide their vertex-compute
                 functions and Vertexica takes care of efficiently
                 executing them in the standard SQL engine. The
                 advantage of using Vertexica is its ability to leverage
                 the relational features and enable much more
                 sophisticated graph analysis. These include expressing
                 graph algorithms which are difficult in vertex-centric
                 but straightforward in SQL and the ability to compose
                 end-to-end data processing pipelines, including pre-
                 and post- processing of graphs as well as combining
                 multiple algorithms for deeper insights. Vertexica has
                 a graphical user interface and we outline several
                 demonstration scenarios including, interactive graph
                 analysis, complex graph analysis, and continuous and
                 time series analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Quamar:2014:NNC,
  author =       "Abdul Quamar and Amol Deshpande and Jimmy Lin",
  title =        "{NScale}: neighborhood-centric analytics on large
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1673--1676",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There is an increasing interest in executing rich and
                 complex analysis tasks over large-scale graphs, many of
                 which require processing and reasoning about a large
                 number of multi-hop neighborhoods or subgraphs in the
                 graph. Examples of such tasks include ego network
                 analysis, motif counting in biological networks,
                 finding social circles, personalized recommendations,
                 link prediction, anomaly detection, analyzing influence
                 cascades, and so on. These tasks are not well served by
                 existing vertex-centric graph processing frameworks
                 whose computation and execution models limit the user
                 program to directly access the state of a single
                 vertex, resulting in high communication, scheduling,
                 and memory overheads in executing such tasks. Further,
                 most existing graph processing frameworks also
                 typically ignore the challenges in extracting the
                 relevant portions of the graph that an analysis task is
                 interested in, and loading it onto distributed memory.
                 In this demonstration proposal, we describe NScale, a
                 novel end-to-end graph processing framework that
                 enables the distributed execution of complex
                 neighborhood-centric analytics over large-scale graphs
                 in the cloud. NScale enables users to write programs at
                 the level of neighborhoods or subgraphs. NScale uses
                 Apache YARN for efficient and fault-tolerant
                 distribution of data and computation; it features GEL,
                 a novel graph extraction and loading phase, that
                 extracts the relevant portions of the graph and loads
                 them into distributed memory using as few machines as
                 possible. NScale utilizes novel techniques for the
                 distributed execution of user computation that minimize
                 memory consumption by exploiting overlap among the
                 neighborhoods of interest. A comprehensive experimental
                 evaluation shows orders-of-magnitude improvements in
                 performance and total cost over vertex-centric
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:DDP,
  author =       "Haoran Li and Li Xiong and Lifan Zhang and Xiaoqian
                 Jiang",
  title =        "{DPSynthesizer}: differentially private data
                 synthesizer for privacy preserving data sharing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1677--1680",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Differential privacy has recently emerged in private
                 statistical data release as one of the strongest
                 privacy guarantees. Releasing synthetic data that mimic
                 original data with differential privacy provides a
                 promising way for privacy preserving data sharing and
                 analytics while providing a rigorous privacy guarantee.
                 However, to this date there is no open-source tools
                 that allow users to generate differentially private
                 synthetic data, in particular, for high dimensional and
                 large domain data. Most of the existing techniques that
                 generate differentially private histograms or synthetic
                 data only work well for single dimensional or
                 low-dimensional histograms. They become problematic for
                 high dimensional and large domain data due to increased
                 perturbation error and computation complexity. We
                 propose DPSynthesizer, a toolkit for differentially
                 private data synthesization. The core of DPSynthesizer
                 is DPCopula designed for high-dimensional and
                 large-domain data. DPCopula computes a differentially
                 private copula function from which synthetic data can
                 be sampled. Copula functions are used to describe the
                 dependence between multivariate random vectors and
                 allow us to build the multivariate joint distribution
                 using one-dimensional marginal distributions.
                 DPSynthesizer also implements a set of state-of-the-art
                 methods for building differentially private histograms,
                 suitable for low-dimensional data, from which synthetic
                 data can be generated. We will demonstrate the system
                 using DPCopula as well as other methods with various
                 data sets and show the feasibility, utility, and
                 efficiency of various methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kong:2014:SLS,
  author =       "Longbo Kong and Zhi Liu and Yan Huang",
  title =        "{SPOT}: locating social media users based on social
                 network context",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1681--1684",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A tremendous amount of information is being shared
                 everyday on social media sites such as Facebook,
                 Twitter or Google+. But only a small portion of users
                 provide their location information, which can be
                 helpful in targeted advertisement and many other
                 services. In this demo we present our large scale user
                 location estimation system, SPOT, which showcase
                 different location estimating models on real world data
                 sets. The demo shows three different location
                 estimation algorithms: a friend-based, a social
                 closeness-based, and an energy and local social
                 coefficient based. The first algorithm is a baseline
                 and the other two new algorithms utilize social
                 closeness information which was traditionally treated
                 as a binary friendship. The two algorithms are based on
                 the premise that friends are different and close
                 friends can help to estimate location better. The demo
                 will also show that all three algorithms benefit from a
                 confidence-based iteration method. The demo is
                 web-based. A user can specify different settings,
                 explore the estimation results on a map, and observe
                 the statistical information, e.g. accuracy and average
                 friends used in the estimation, dynamically. The demo
                 provides two datasets: Twitter (148,860 located users)
                 and Gowalla (99,563 located users). Furthermore, a user
                 can filter users with certain features, e.g. with more
                 than 100 friends, to see how the estimating models work
                 on a particular case. The estimated and real locations
                 of those users as well as their friends will be
                 displayed on the map.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alavi:2014:RQE,
  author =       "Zohreh Alavi and Lu Zhou and James Powers and Keke
                 Chen",
  title =        "{RASP-QS}: efficient and confidential query services
                 in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1685--1688",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Hosting data query services in public clouds is an
                 attractive solution for its great scalability and
                 significant cost savings. However, data owners also
                 have concerns on data privacy due to the lost control
                 of the infrastructure. This demonstration shows a
                 prototype for efficient and confidential range/kNN
                 query services built on top of the random space
                 perturbation (RASP) method. The RASP approach provides
                 a privacy guarantee practical to the setting of
                 cloud-based computing, while enabling much faster query
                 processing compared to the encryption-based approach.
                 This demonstration will allow users to more intuitively
                 understand the technical merits of the RASP approach
                 via interactive exploration of the visual interface.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kunjir:2014:TTM,
  author =       "Mayuresh Kunjir and Prajakta Kalmegh and Shivnath
                 Babu",
  title =        "{Thoth}: towards managing a multi-system cluster",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1689--1692",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Following the 'no one size fits all' philosophy,
                 active research in big data platforms is focusing on
                 creating an environment for multiple 'one-size' systems
                 to co-exist and cooperate in the same cluster.
                 Consequently, it has now become imperative to provide
                 an integrated management solution that provides a
                 database-centric view of the underlying multi-system
                 environment. We outline the proposal of DBMS$^+$, a
                 database management platform over multiple 'one-size'
                 systems. Our prototype implementation of DBMS$^+$,
                 called Thoth, adaptively chooses a best-fit system
                 based on application requirements. In this
                 demonstration, we propose to showcase Thoth DM, a data
                 management framework for Thoth which consists of a data
                 collection pipeline utility, data consolidation and
                 dispatcher module, and a warehouse for storing this
                 data. We further introduce the notion of apps; an app
                 is a utility that registers with Thoth DM and
                 interfaces with its warehouse to provide core database
                 management functionalities like dynamic provisioning of
                 resources, designing a multi-system-aware optimizer,
                 tuning of configuration parameters on each system, data
                 storage, and layout schemes. We will demonstrate Thoth
                 DM in action over Hive, Hadoop, Shark, Spark, and the
                 Hadoop Distributed File System. This demonstration will
                 focus on the following apps: (i) Dashboard for
                 administration and control that will let the audience
                 monitor and visualize a database-centric view of the
                 multi-system cluster, and (ii) Data Layout Recommender
                 app will allow searching for the optimal data layout in
                 the multi-system setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:XLC,
  author =       "Lei Zhang and Achim Rettinger",
  title =        "{X-LiSA}: cross-lingual semantic annotation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1693--1696",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ever-increasing quantities of structured knowledge
                 on the Web and the impending need of multilinguality
                 and cross-linguality for information access pose new
                 challenges but at the same time open up new
                 opportunities for knowledge extraction research. In
                 this regard, cross-lingual semantic annotation has
                 emerged as a topic of major interest and it is
                 essential to build tools that can link words and
                 phrases in unstructured text in one language to
                 resources in structured knowledge bases in any other
                 language. In this paper, we demonstrate X-LiSA, an
                 infrastructure for cross-lingual semantic annotation,
                 which supports both service-oriented and user-oriented
                 interfaces for annotating text documents and web pages
                 in different languages using resources from Wikipedia
                 and Linked Open Data (LOD).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jayachandran:2014:CUI,
  author =       "Prasanth Jayachandran and Karthik Tunga and Niranjan
                 Kamat and Arnab Nandi",
  title =        "Combining user interaction, speculative query
                 execution and sampling in the {DICE} system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1697--1700",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The interactive exploration of data cubes has become a
                 popular application, especially over large datasets. In
                 this paper, we present DICE, a combination of a novel
                 frontend query interface and distributed aggregation
                 backend that enables interactive cube exploration. DICE
                 provides a convenient, practical alternative to the
                 typical offline cube materialization strategy by
                 allowing the user to explore facets of the data cube,
                 trading off accuracy for interactive response-times, by
                 sampling the data. We consider the time spent by the
                 user perusing the results of their current query as an
                 opportunity to execute and cache the most likely
                 followup queries. The frontend presents a novel
                 intuitive interface that allows for sampling-aware
                 aggregations, and encourages interaction via our
                 proposed faceted model. The design of our backend is
                 tailored towards the low-latency user interaction at
                 the frontend, and vice-versa. We discuss the
                 synergistic design behind both the frontend user
                 experience and the backend architecture of DICE; and,
                 present a demonstration that allows the user to fluidly
                 interact with billion-tuple datasets within sub-second
                 interactive response times.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Su:2014:SSM,
  author =       "Han Su and Kai Zheng and Kai Zeng and Jiamin Huang and
                 Xiaofang Zhou",
  title =        "{STMaker}: a system to make sense of trajectory data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1701--1704",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Widely adoption of GPS-enabled devices generates large
                 amounts of trajectories every day. The raw trajectory
                 data describes the movement history of moving objects
                 by a sequence of (longitude, latitude, time-stamp)
                 triples, which are nonintuitive for human to perceive
                 the prominent features of the trajectory, such as where
                 and how the moving object travels. In this demo, we
                 present the STMaker system to help users make sense of
                 individual trajectories. Given a trajectory, STMaker
                 can automatically extract the significant semantic
                 behavior of the trajectory, and summarize the behavior
                 by a short human-readable text. In this paper, we first
                 introduce the phrases of generating trajectory
                 summarizations, and then show several real trajectory
                 summarization cases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jugel:2014:FVA,
  author =       "Uwe Jugel and Zbigniew Jerzak and Gregor Hackenbroich
                 and Volker Markl",
  title =        "Faster visual analytics through pixel-perfect
                 aggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1705--1708",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "State-of-the-art visual data analysis tools ignore
                 bandwidth limitations. They fetch millions of records
                 of high-volume time series data from an underlying
                 RDBMS to eventually draw only a few thousand pixels on
                 the screen. In this work, we demonstrate a pixel-aware
                 big data visualization system that dynamically adapts
                 the number of data points transmitted and thus the data
                 rate, while preserving pixel-perfect visualizations. We
                 show how to carefully select the data points to fetch
                 for each pixel of a visualization, using a
                 visualization-driven data aggregation that models the
                 visualization process. Defining all required data
                 reduction operators at the query level, our system
                 trades off a few milliseconds of query execution time
                 for dozens of seconds of data transfer time. The
                 results are significantly reduced response times and a
                 near real-time visualization of millions of data
                 points. Using our pixel-aware system, the audience will
                 be able to enjoy the speed and ease of big data
                 visualizations and learn about the scientific
                 background of our system through an interactive
                 evaluation component, allowing the visitor to measure,
                 visualize, and compare competing visualization-related
                 data reduction techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khan:2014:SBG,
  author =       "Arijit Khan and Sameh Elnikety",
  title =        "Systems for big-graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1709--1710",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphs have become increasingly important to represent
                 highly-interconnected structures and schema-less data
                 including the World Wide Web, social networks,
                 knowledge graphs, genome and scientific databases,
                 medical and government records. The massive scale of
                 graph data easily overwhelms the main memory and
                 computation resources on commodity servers. In these
                 cases, achieving low latency and high throughput
                 requires partitioning the graph and processing the
                 graph data in parallel across a cluster of servers.
                 However, the software and hardware advances that have
                 worked well for developing parallel databases and
                 scientific applications are not necessarily effective
                 for big-graph problems. Graph processing poses
                 interesting system challenges: graphs represent
                 relationships which are usually irregular and
                 unstructured; and therefore, the computation and data
                 access patterns have poor locality. Hence, the last few
                 years has seen an unprecedented interest in building
                 systems for big-graphs by various communities including
                 databases, systems, semantic web, machine learning, and
                 operations research. In this tutorial, we discuss the
                 design of the emerging systems for processing of
                 big-graphs, key features of distributed graph
                 algorithms, as well as graph partitioning and workload
                 balancing techniques. We emphasize the current
                 challenges and highlight some future research
                 directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gal:2014:UER,
  author =       "Avigdor Gal",
  title =        "Uncertain entity resolution: re-evaluating entity
                 resolution in the big data era: tutorial",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1711--1712",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity resolution is a fundamental problem in data
                 integration dealing with the combination of data from
                 different sources to a unified view of the data. Entity
                 resolution is inherently an uncertain process because
                 the decision to map a set of records to the same entity
                 cannot be made with certainty unless these are
                 identical in all of their attributes or have a common
                 key. In the light of recent advancement in data
                 accumulation, management, and analytics landscape
                 (known as big data) the tutorial re-evaluates the
                 entity resolution process and in particular looks at
                 best ways to handle data veracity. The tutorial ties
                 entity resolution with recent advances in probabilistic
                 database research, focusing on sources of uncertainty
                 in the entity resolution process.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Suchanek:2014:KBA,
  author =       "Fabian M. Suchanek and Gerhard Weikum",
  title =        "Knowledge bases in the age of big data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1713--1714",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This tutorial gives an overview on state-of-the-art
                 methods for the automatic construction of large
                 knowledge bases and harnessing them for data and text
                 analytics. It covers both big-data methods for building
                 knowledge bases and knowledge bases being assets for
                 big-data applications. The tutorial also points out
                 challenges and research opportunities.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Meliou:2014:CED,
  author =       "Alexandra Meliou and Sudeepa Roy and Dan Suciu",
  title =        "Causality and explanations in databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1715--1716",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the surge in the availability of information,
                 there is a great demand for tools that assist users in
                 understanding their data. While today's exploration
                 tools rely mostly on data visualization, users often
                 want to go deeper and understand the underlying causes
                 of a particular observation. This tutorial surveys
                 research on causality and explanation for data-oriented
                 applications. We will review and summarize the research
                 thus far into causality and explanation in the database
                 and AI communities, giving researchers a snapshot of
                 the current state of the art on this topic, and propose
                 a unified framework as well as directions for future
                 research. We will cover both the theory of
                 causality/explanation and some applications; we also
                 discuss the connections with other topics in database
                 research like provenance, deletion propagation, why-not
                 queries, and OLAP techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:ESB,
  author =       "Yunyao Li and Ziyang Liu and Huaiyu Zhu",
  title =        "Enterprise search in the big data era: recent
                 developments and open challenges",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1717--1718",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Enterprise search allows users in an enterprise to
                 retrieve desired information through a simple search
                 interface. It is widely viewed as an important
                 productivity tool within an enterprise. While Internet
                 search engines have been highly successful, enterprise
                 search remains notoriously challenging due to a variety
                 of unique challenges, and is being made more so by the
                 increasing heterogeneity and volume of enterprise data.
                 On the other hand, enterprise search also presents
                 opportunities to succeed in ways beyond current
                 Internet search capabilities. This tutorial presents an
                 organized overview of these challenges and
                 opportunities, and reviews the state-of-the-art
                 techniques for building a reliable and high quality
                 enterprise search engine, in the context of the rise of
                 big data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:VPD,
  author =       "Yunyao Li and Erich Neuhold",
  title =        "{VLDB 2014} {Ph.D.} workshop: an overview",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1719--1719",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The VLDB 2014 PhD Workshop is an one-day event to be
                 held in Hangzhou, China on September 1st, 2014, in
                 conjunction with VLDB 2014. The aim of this workshop is
                 to provide helpful feedback, useful information and
                 networking opportunities that can benefit the students'
                 dissertation work as well as their long-term career.
                 The selection process and the workshop program were
                 carefully designed with this specific goal in mind. The
                 accepted submissions are included in the online
                 proceedings for the Workshop at
                 \ur{http://www.vldb.org/2014/phd_workshop_proceedings.html}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Venkataraman:2014:DCG,
  author =       "Shivakumar Venkataraman and Divyakant Agrawal",
  title =        "Datacenters as computers: {Google} engineering \&
                 database research perspectives",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1720--1721",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this collaborative keynote address, we will share
                 Google's experience in building a scalable data
                 infrastructure that leverages datacenters for managing
                 Google's advertising data over the last decade. In
                 order to support the massive online advertising
                 platform at Google, the data infrastructure must
                 simultaneously support both transactional and
                 analytical workloads. The focus of this talk will be to
                 highlight how the datacenter architecture and the cloud
                 computing paradigm has enabled us to manage the
                 exponential growth in data volumes and user queries,
                 make our services highly available and fault tolerant
                 to massive datacenter outages, and deliver results with
                 very low latencies. We note that other Internet
                 companies have also undergone similar growth in data
                 volumes and user queries. In fact, this phenomenon has
                 resulted in at least two new terms in the technology
                 lexicon: big data and cloud computing. Cloud computing
                 (and datacenters) have been largely responsible for
                 scaling the data volumes from terabytes range just a
                 few years ago to now reaching in the exabyte range over
                 the next couple of years. Delivering solutions at this
                 scale that are fault-tolerant, latency sensitive, and
                 highly available requires a combination of research
                 advances with engineering ingenuity at Google and
                 elsewhere. Next, we will try to answer the following
                 question: is a datacenter just another (very large)
                 computer? Or, does it fundamentally change the design
                 principles for data-centric applications and systems.
                 We will conclude with some of the unique research
                 challenges that need to be addressed in order to
                 sustain continuous growth in data volumes while
                 supporting high throughput and low latencies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Plattner:2014:ICM,
  author =       "Hasso Plattner",
  title =        "The impact of columnar in-memory databases on
                 enterprise systems: implications of eliminating
                 transaction-maintained aggregates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1722--1729",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Five years ago I proposed a common database approach
                 for transaction processing and analytical systems using
                 a columnar in-memory database, disputing the common
                 belief that column stores are not suitable for
                 transactional workloads. Today, the concept has been
                 widely adopted in academia and industry and it is
                 proven that it is feasible to run analytical queries on
                 large data sets directly on a redundancy-free schema,
                 eliminating the need to maintain pre-built aggregate
                 tables during data entry transactions. The resulting
                 reduction in transaction complexity leads to a dramatic
                 simplification of data models and applications,
                 redefining the way we build enterprise systems. First
                 analyses of productive applications adopting this
                 concept confirm that system architectures enabled by
                 in-memory column stores are conceptually superior for
                 business transaction processing compared to row-based
                 approaches. Additionally, our analyses show a shift of
                 enterprise workloads to even more read-oriented
                 processing due to the elimination of updates of
                 transaction-maintained aggregates.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Markl:2014:BCD,
  author =       "Volker Markl",
  title =        "Breaking the chains: on declarative data analysis and
                 data independence in the big data era",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1730--1733",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data management research, systems, and technologies
                 have drastically improved the availability of data
                 analysis capabilities, particularly for non-experts,
                 due in part to low-entry barriers and reduced ownership
                 costs (e.g., for data management infrastructures and
                 applications). Major reasons for the widespread success
                 of database systems and today's multi-billion dollar
                 data management market include data independence,
                 separating physical representation and storage from the
                 actual information, and declarative languages,
                 separating the program specification from its intended
                 execution environment. In contrast, today's big data
                 solutions do not offer data independence and
                 declarative specification. As a result, big data
                 technologies are mostly employed in newly-established
                 companies with IT-savvy employees or in large
                 well-established companies with big IT departments. We
                 argue that current big data solutions will continue to
                 fall short of widespread adoption, due to usability
                 problems, despite the fact that in-situ data analytics
                 technologies achieve a good degree of schema
                 independence. In particular, we consider the lack of a
                 declarative specification to be a major road-block,
                 contributing to the scarcity in available data
                 scientists available and limiting the application of
                 big data to the IT-savvy industries. In particular,
                 data scientists currently have to spend a lot of time
                 on tuning their data analysis programs for specific
                 data characteristics and a specific execution
                 environment. We believe that the research community
                 needs to bring the powerful concepts of declarative
                 specification to current data analysis systems, in
                 order to achieve the broad big data technology adoption
                 and effectively deliver the promise that novel big data
                 technologies offer.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Neumann:2014:EHP,
  author =       "Thomas Neumann",
  title =        "Engineering high-performance database engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1734--1741",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Developing a database engine is both challenging and
                 rewarding. Database engines are very complex software
                 artifacts that have to scale to large data sizes and
                 large hardware configurations, and developing such
                 systems usually means choosing between different
                 trade-offs at various points of development. This
                 papers gives a survey over two different database
                 engines, the disk-based SPARQL-processing engine
                 RDF-3X, and the relational main-memory engine HyPer. It
                 discusses the design choices that were made during
                 development, and highlights optimization techniques
                 that are important for both systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2014:RLC,
  author =       "Wei Cao and Feng Yu and Jiasen Xie",
  title =        "Realization of the low cost and high performance
                 {MySQL} cloud database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1742--1747",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MySQL is a low cost, high performance, good
                 reliability and open source database product, widely
                 used in many Internet companies. For example, there are
                 thousands of MySQL servers being used in Taobao.
                 Although NoSQL developed very quickly in past two
                 years, and new products emerged in endlessly, but in
                 the actual business application of NoSQL, the
                 requirements to developers are relatively high.
                 Moreover, MySQL has many more mature middleware,
                 maintenance tools and a benign ecological circle, so
                 from this perspective, MySQL dominates in the whole
                 situation, while NoSQL is as a supplement. We (the core
                 system database team of Taobao) have done a lot of work
                 in the field of MySQL hosting platform, designed and
                 implemented a UMP (Unified MySQL Platform) system, to
                 provide a low cost and high performance MySQL cloud
                 database service.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qin:2014:FCS,
  author =       "An Qin and Dianming Hu and Jun Liu and Wenjun Yang and
                 Dai Tan",
  title =        "{Fatman}: cost-saving and reliable archival storage
                 based on volunteer resources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1748--1753",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present Fatman, an enterprise-scale archival
                 storage based on volunteer contribution resources from
                 underutilized web servers, usually deployed on
                 thousands of nodes with spare storage capacity. Fatman
                 is specifically designed for enhancing the utilization
                 of existing storage resources and cutting down the
                 hardware purchase cost. Two major concerned issues of
                 the system design are maximizing the resource
                 utilization of volunteer nodes without violating
                 Service Level Objectives (SLOs) and minimizing the cost
                 without reducing the availability of archival system.
                 Fatman has been widely deployed on tens of thousands of
                 server nodes across several datacenters, provided more
                 than 100PB storage capacity and served dozens of
                 internal mass-data applications. The system realizes an
                 efficient storage quota consolidation by strong
                 isolation and budget limitation, to maximally support
                 resources contribution without any degradation on
                 host-level SLOs. It firstly improves data reliability
                 by applying disk failure prediction to diminish failure
                 recovery cost, named fault-aware data management,
                 dramatically reduces the MTTR by 76.3\% and decreases
                 file crash ratio by 35\% on real-life product
                 workload.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:DIR,
  author =       "Shiming Zhang and Yin Yang and Wei Fan and Marianne
                 Winslett",
  title =        "Design and implementation of a real-time interactive
                 analytics system for large spatio-temporal data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1754--1759",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In real-time interactive data analytics, the user
                 expects to receive the results of each query within a
                 short time period such as seconds. This is especially
                 challenging when the data is big (e.g., on the scale of
                 petabytes), and the analytics system runs on top of
                 cloud infrastructure (e.g., thousands of interconnected
                 commodity servers). We have been building such a
                 system, called OceanRT, for managing large
                 spatio-temporal data such as call logs and mobile web
                 browsing records collected by a telecommunication
                 company. Although there already exist systems for
                 querying big data in real time, OceanRT's performance
                 stands out due to several novel designs and components
                 that address key efficiency and scalability issues that
                 were largely overlooked in existing systems. First,
                 OceanRT makes extensive use of software RDMA one-sided
                 operations, which reduce networking costs without
                 requiring specialized hardware. Second, OceanRT
                 exploits the parallel computing capabilities of each
                 node in the cloud through a novel architecture
                 consisting of Access-Query Engines (AQEs) connected
                 with minimal overhead. Third, OceanRT contains a novel
                 storage scheme that optimizes for queries with joins
                 and multi-dimensional selections, which are common for
                 large spatio-temporal data. Experiments using the
                 TPC-DS benchmark show that OceanRT is usually more than
                 an order of magnitude faster than the current
                 state-of-the-art systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dai:2014:PRS,
  author =       "Chaoyue Dai and Feng Qian and Wei Jiang and Zhoutian
                 Wang and Zenghong Wu",
  title =        "A personalized recommendation system for {NetEase}
                 dating site",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1760--1765",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the popularity of internet, more and more people
                 try to find friends or dating partners on online dating
                 web sites. Recommending appropriate partners from a
                 large amount of candidates becomes an interesting and
                 challenging problem in the field of recommendation
                 system. Various types of recommendation techniques
                 (e.g., content based recommendation, collaborative
                 filtering and association rule mining) have be proposed
                 to tackle this problem. However most of them ignore the
                 personalization concerns that they (1) mainly consider
                 the hot users or frequent items, (2) cover only a
                 portion of users especially ignoring the long tails,
                 (3) and cannot deal with the cold start problem
                 properly. In this paper, we present a regression based
                 hybrid recommendation system that makes use of matching
                 degree, fancy degree, activity, sincerity, popularity
                 and enthusiasm, to recommend appropriate partners. The
                 experimental evaluation of our recommendation system on
                 a real dating web site shows our strategy is more
                 effective and efficient than its previous version which
                 follows the principle of giving higher priority to the
                 recent active users.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ling:2014:GIH,
  author =       "Zheng Jye Ling and Quoc Trung Tran and Ju Fan and
                 Gerald C. H. Koh and Thi Nguyen and Chuen Seng Tan and
                 James W. L. Yip and Meihui Zhang",
  title =        "{GEMINI}: an integrative healthcare analytics system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1766--1771",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Healthcare systems around the world are facing the
                 challenge of information overload in caring for
                 patients in an affordable, safe and high-quality manner
                 in a system with limited healthcare resources and
                 increasing costs. To alleviate this problem, we develop
                 an integrative healthcare analytics system called
                 GEMINI which allows point of care analytics for doctors
                 where real-time usable and relevant information of
                 their patients are required through the questions they
                 asked about the patients they are caring for. GEMINI
                 extracts data of each patient from various data sources
                 and stores them as information in a patient profile
                 graph. The data sources are complex and varied
                 consisting of both structured data (such as, patients'
                 demographic data, laboratory results and medications)
                 and unstructured data (such as, doctors' notes). Hence,
                 the patient profile graph provides a holistic and
                 comprehensive information of patients' healthcare
                 profile, from which GEMINI can infer implicit
                 information useful for administrative and clinical
                 purposes, and extract relevant information for
                 performing predictive analytics. At the core, GEMINI
                 keeps interacting with the healthcare professionals as
                 part of a feedback loop to gather, infer, ascertain and
                 enhance the self-learning knowledge base. We present a
                 case study on using GEMINI to predict the risk of
                 unplanned patient readmissions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zou:2014:MTD,
  author =       "Yongqiang Zou and Xing Jin and Yi Li and Zhimao Guo
                 and Eryu Wang and Bin Xiao",
  title =        "{Mariana}: {Tencent} deep learning platform and its
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1772--1777",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Deep learning gains lots of attentions in recent years
                 and is more and more important for mining values in big
                 data. However, to make deep learning practical for a
                 wide range of applications in Tencent Inc., three
                 requirements must be considered: (1) Lots of
                 computational power are required to train a practical
                 model with tens of millions of parameters and billions
                 of samples for products such as automatic speech
                 recognition (ASR), and the number of parameters and
                 training data is still growing. (2) The capability of
                 training larger model is necessary for better model
                 quality. (3) Easy to use frameworks are valuable to do
                 many experiments to perform model selection, such as
                 finding an appropriate optimization algorithm and
                 tuning optimal hyper-parameters. To accelerate
                 training, support large models, and make experiments
                 easier, we built Mariana, the Tencent deep learning
                 platform, which utilizes GPU and CPU cluster to train
                 models parallelly with three frameworks: (1) a
                 multi-GPU data parallelism framework for deep neural
                 networks (DNNs). (2) a multi-GPU model parallelism and
                 data parallelism framework for deep convolutional
                 neural networks (CNNs). (3) a CPU cluster framework for
                 large scale DNNs. Mariana also provides built-in
                 algorithms and features to facilitate experiments.
                 Mariana is in production usage for more than one year,
                 achieves state-of-the-art acceleration performance, and
                 plays a key role in training models and improving
                 quality for automatic speech recognition and image
                 recognition in Tencent WeChat, a mobile social
                 platform, and for Ad click-through rate prediction
                 (pCTR) in Tencent QQ, an instant messaging platform,
                 and Tencent Qzone, a social networking service.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2014:YPC,
  author =       "Sai Wu and Chun Chen and Gang Chen and Ke Chen and
                 Lidan Shou and Hui Cao and He Bai",
  title =        "{YZStack}: provisioning customizable solution for big
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1778--1783",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "YZStack is our developing solution which implements
                 many well-established big data techniques as selectable
                 modules and allows users to customize their systems as
                 a process of module selection. In particular, it
                 includes an openstack based IaaS (Infrastructure as a
                 Service) layer, a distributed file system based DaaS
                 (Data as a Service) layer, a PaaS (Platform as a
                 Service) layer equipped with parallel processing
                 techniques and a SaaS (Software as a Service) layer
                 with popular data analytic algorithms. Layers of
                 YZStack are loosely connected, so that customization of
                 one layer does not affect the other layers and their
                 interactions. In this paper, we use a smart financial
                 system developed for the Zhejiang Provincial Department
                 of Finance to demonstrate how to leverage YZStack to
                 speed up the implementation of big data system. We also
                 introduce two popular applications of the financial
                 system, economic prediction and detection of improper
                 payment.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Klonatos:2014:EBE,
  author =       "Yannis Klonatos and Christoph Koch and Tiark Rompf and
                 Hassan Chafi",
  title =        "Errata for {``Building efficient query engines in a
                 high-level language'': PVLDB {\bf 7}(10):853--864}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "13",
  pages =        "1784--1784",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See \cite{Klonatos:2014:BEQ}.",
  abstract =     "This is in response to recent feedback from our peers
                 that calls for a number of clarifications regarding the
                 experimental section of our paper.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2014:SMM,
  author =       "Wei Lu and Shanshan Chen and Keqian Li and Laks V. S.
                 Lakshmanan",
  title =        "Show me the money: dynamic recommendations for revenue
                 maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1785--1796",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recommender Systems (RS) play a vital role in
                 applications such as e-commerce and on-demand content
                 streaming. Research on RS has mainly focused on the
                 customer perspective, i.e., accurate prediction of user
                 preferences and maximization of user utilities. As a
                 result, most existing techniques are not explicitly
                 built for revenue maximization, the primary business
                 goal of enterprises. In this work, we explore and
                 exploit a novel connection between RS and the
                 profitability of a business. As recommendations can be
                 seen as an information channel between a business and
                 its customers, it is interesting and important to
                 investigate how to make strategic dynamic
                 recommendations leading to maximum possible revenue. To
                 this end, we propose a novel revenue model that takes
                 into account a variety of factors including prices,
                 valuations, saturation effects, and competition amongst
                 products. Under this model, we study the problem of
                 finding revenue-maximizing recommendation strategies
                 over a finite time horizon. We show that this problem
                 is NP-hard, but approximation guarantees can be
                 obtained for a slightly relaxed version, by
                 establishing an elegant connection to matroid theory.
                 Given the prohibitively high complexity of the
                 approximation algorithm, we also design intelligent
                 heuristics for the original problem. Finally, we
                 conduct extensive experiments on two real and synthetic
                 datasets and demonstrate the efficiency, scalability,
                 and effectiveness our algorithms, and that they
                 significantly outperform several intuitive baselines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2014:SSG,
  author =       "Peng Lu and Gang Chen and Beng Chin Ooi and Hoang Tam
                 Vo and Sai Wu",
  title =        "{ScalaGiST}: scalable generalized search trees for
                 {MapReduce} systems [innovative systems paper]",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1797--1808",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MapReduce has become the state-of-the-art for data
                 parallel processing. Nevertheless, Hadoop, an
                 open-source equivalent of MapReduce, has been noted to
                 have sub-optimal performance in the database context
                 since it is initially designed to operate on raw data
                 without utilizing any type of indexes. To alleviate the
                 problem, we present ScalaGiST --- scalable generalized
                 search tree that can be seamlessly integrated with
                 Hadoop, together with a cost-based data access
                 optimizer for efficient query processing at run-time.
                 ScalaGiST provides extensibility in terms of data and
                 query types, hence is able to support unconventional
                 queries (e.g., multi-dimensional range and $k$-NN
                 queries) in MapReduce systems, and can be dynamically
                 deployed in large cluster environments for handling big
                 users and data. We have built ScalaGiST and
                 demonstrated that it can be easily instantiated to
                 common B$^+$ -tree and R-tree indexes yet for dynamic
                 distributed environments. Our extensive performance
                 study shows that ScalaGiST can provide efficient write
                 and read performance, elastic scaling property, as well
                 as effective support for MapReduce execution of ad-hoc
                 analytic queries. Performance comparisons with recent
                 proposals of specialized distributed index structures,
                 such as SpatialHadoop, Data Mapping, and RT-CAN further
                 confirm its efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2014:FPK,
  author =       "Mohan Yang and Bolin Ding and Surajit Chaudhuri and
                 Kaushik Chakrabarti",
  title =        "Finding patterns in a knowledge base using keywords to
                 compose table answers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1809--1820",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We aim to provide table answers to keyword queries
                 using a knowledge base. For queries referring to
                 multiple entities, like ``Washington cities
                 population'' and ``Mel Gibson movies'', it is better to
                 represent each relevant answer as a table which
                 aggregates a set of entities or joins of entities
                 within the same table scheme or pattern. In this paper,
                 we study how to find highly relevant patterns in a
                 knowledge base for user-given keyword queries to
                 compose table answers. A knowledge base is modeled as a
                 directed graph called knowledge graph, where nodes
                 represent its entities and edges represent the
                 relationships among them. Each node/edge is labeled
                 with type and text. A pattern is an aggregation of
                 subtrees which contain all keywords in the texts and
                 have the same structure and types on node/edges. We
                 propose efficient algorithms to find patterns that are
                 relevant to the query for a class of scoring functions.
                 We show the hardness of the problem in theory, and
                 propose path-based indexes that are affordable in
                 memory. Two query-processing algorithms are proposed:
                 one is fast in practice for small queries (with small
                 numbers of patterns as answers) by utilizing the
                 indexes; and the other one is better in theory, with
                 running time linear in the sizes of indexes and
                 answers, which can handle large queries better. We also
                 conduct extensive experimental study to compare our
                 approaches with a naive adaption of known techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yan:2014:PAG,
  author =       "Da Yan and James Cheng and Kai Xing and Yi Lu and
                 Wilfred Ng and Yingyi Bu",
  title =        "{Pregel} algorithms for graph connectivity problems
                 with performance guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1821--1832",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphs in real life applications are often huge, such
                 as the Web graph and various social networks. These
                 massive graphs are often stored and processed in
                 distributed sites. In this paper, we study graph
                 algorithms that adopt Google's Pregel, an iterative
                 vertex-centric framework for graph processing in the
                 Cloud. We first identify a set of desirable properties
                 of an efficient Pregel algorithm, such as linear space,
                 communication and computation cost per iteration, and
                 logarithmic number of iterations. We define such an
                 algorithm as a practical Pregel algorithm (PPA). We
                 then propose PPAs for computing connected components
                 (CCs), biconnected components (BCCs) and strongly
                 connected components (SCCs). The PPAs for computing
                 BCCs and SCCs use the PPAs of many fundamental graph
                 problems as building blocks, which are of interest by
                 themselves. Extensive experiments over large real
                 graphs verified the efficiency of our algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shang:2014:AAG,
  author =       "Zechao Shang and Jeffrey Xu Yu",
  title =        "Auto-approximation of graph computing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1833--1844",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the big data era, graph computing is one of the
                 challenging issues because there are numerous large
                 graph datasets emerging from real applications. A
                 question is: do we need to know the final exact answer
                 for a large graph? When it is impossible to know the
                 exact answer in a limited time, is it possible to
                 approximate the final answer in an automatic and
                 systematic way without having to designing new
                 approximate algorithms? The main idea behind the
                 question is: it is more important to find out something
                 meaningful quick from a large graph, and we should
                 focus on finding a way of making use of large graphs
                 instead of spending time on designing approximate
                 algorithms. In this paper, we give an innovative
                 approach which automatically and systematically
                 synthesizes a program to approximate the original
                 program. We show that we can give users some answers
                 with reasonable accuracy and high efficiency for a wide
                 spectrum of graph algorithms, without having to know
                 the details of graph algorithms. We have conducted
                 extensive experimental studies using many graph
                 algorithms that are supported in the existing graph
                 systems and large real graphs. Our extensive
                 experimental results reveal that our automatically
                 approximating approach is highly feasible.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Furche:2014:DTW,
  author =       "Tim Furche and Georg Gottlob and Giovanni Grasso and
                 Xiaonan Guo and Giorgio Orsi and Christian Schallhart
                 and Cheng Wang",
  title =        "{DIADEM}: thousands of websites to a single database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1845--1856",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The web is overflowing with implicitly structured
                 data, spread over hundreds of thousands of sites,
                 hidden deep behind search forms, or siloed in
                 marketplaces, only accessible as HTML. Automatic
                 extraction of structured data at the scale of thousands
                 of websites has long proven elusive, despite its
                 central role in the ``web of data''. Through an
                 extensive evaluation spanning over 10000 web sites from
                 multiple application domains, we show that automatic,
                 yet accurate full-site extraction is no longer a
                 distant dream. diadem is the first automatic full-site
                 extraction system that is able to extract structured
                 data from different domains at very high accuracy. It
                 combines automated exploration of websites,
                 identification of relevant data, and induction of
                 exhaustive wrappers. Automating these components is the
                 first challenge. diadem overcomes this challenge by
                 combining phenomenological and ontological knowledge.
                 Integrating these components is the second challenge.
                 diadem overcomes this challenge through a self-adaptive
                 network of relational transducers that produces
                 effective wrappers for a wide variety of websites. Our
                 extensive and publicly available evaluation shows that,
                 for more than 90\% of sites from three domains, diadem
                 obtains an effective wrapper that extracts all relevant
                 data with 97\% average precision. diadem also tolerates
                 noisy entity recognisers, and its components
                 individually outperform comparable approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2014:UAQ,
  author =       "Wentao Wu and Xi Wu and Hakan Hacig{\"u}m{\"u}s and
                 Jeffrey F. Naughton",
  title =        "Uncertainty aware query execution time prediction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1857--1868",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Predicting query execution time is a fundamental issue
                 underlying many database management tasks. Existing
                 predictors rely on information such as cardinality
                 estimates and system performance constants that are
                 difficult to know exactly. As a result, accurate
                 prediction still remains elusive for many queries.
                 However, existing predictors provide a single, point
                 estimate of the true execution time, but fail to
                 characterize the uncertainty in the prediction. In this
                 paper, we take a first step towards providing
                 uncertainty information along with query execution time
                 predictions. We use the query optimizer's cost model to
                 represent the query execution time as a function of the
                 selectivities of operators in the query plan as well as
                 the constants that describe the cost of CPU and I/O
                 operations in the system. By treating these quantities
                 as random variables rather than constants, we show that
                 with low overhead we can infer the distribution of
                 likely prediction errors. We further show that the
                 estimated prediction errors by our proposed techniques
                 are strongly correlated with the actual prediction
                 errors.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Konstantinidis:2014:OCS,
  author =       "George Konstantinidis and Jos{\'e} Luis Ambite",
  title =        "Optimizing the chase: scalable data integration under
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1869--1880",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We are interested in scalable data integration and
                 data exchange under constraints/dependencies. In data
                 exchange the problem is how to materialize a target
                 database instance, satisfying the source-to-target and
                 target dependencies, that provides the certain answers.
                 In data integration, the problem is how to rewrite a
                 query over the target schema into a query over the
                 source schemas that provides the certain answers. In
                 both these problems we make use of the chase algorithm,
                 the main tool to reason with dependencies. Our first
                 contribution is to introduce the frugal chase, which
                 produces smaller universal solutions than the standard
                 chase, still remaining polynomial in data complexity.
                 Our second contribution is to use the frugal chase to
                 scale up query answering using views under LAV weakly
                 acyclic target constraints, a useful language capturing
                 RDF/S. The latter problem can be reduced to query
                 rewriting using views without constraints by chasing
                 the source-to-target mappings with the target
                 constraints. We construct a compact graph-based
                 representation of the mappings and the constraints and
                 develop an efficient algorithm to run the frugal chase
                 on this representation. We show experimentally that our
                 approach scales to large problems, speeding up the
                 compilation of the dependencies into the mappings by
                 close to 2 and 3 orders of magnitude, compared to the
                 standard and the core chase, respectively. Compared to
                 the standard chase, we improve online query rewriting
                 time by a factor of 3, while producing equivalent, but
                 smaller, rewritings of the original query.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Athanassoulis:2014:BTA,
  author =       "Manos Athanassoulis and Anastasia Ailamaki",
  title =        "{BF}-tree: approximate tree indexing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1881--1892",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The increasing volume of time-based generated data and
                 the shift in storage technologies suggest that we might
                 need to reconsider indexing. Several workloads --- like
                 social and service monitoring --- often include
                 attributes with implicit clustering because of their
                 time-dependent nature. In addition, solid state disks
                 (SSD) (using flash or other low-level technologies)
                 emerge as viable competitors of hard disk drives (HDD).
                 Capacity and access times of storage devices create a
                 trade-off between SSD and HDD. Slow random accesses in
                 HDD have been replaced by efficient random accesses in
                 SSD, but their available capacity is one or more orders
                 of magnitude more expensive than the one of HDD.
                 Indexing, however, is designed assuming HDD as
                 secondary storage, thus minimizing random accesses at
                 the expense of capacity. Indexing data using SSD as
                 secondary storage requires treating capacity as a
                 scarce resource. To this end, we introduce approximate
                 tree indexing, which employs probabilistic data
                 structures (Bloom filters) to trade accuracy for size
                 and produce smaller, yet powerful, tree indexes, which
                 we name Bloom filter trees (BF-Trees). BF-Trees exploit
                 pre-existing data ordering or partitioning to offer
                 competitive search performance. We demonstrate, both by
                 an analytical study and by experimental results, that
                 by using workload knowledge and reducing indexing
                 accuracy up to some extent, we can save substantially
                 on capacity when indexing on ordered or partitioned
                 attributes. In particular, in experiments with a
                 synthetic workload, approximate indexing offers
                 2.22x-48x smaller index footprint with competitive
                 response times, and in experiments with TPCH and a
                 monitoring real-life dataset from an energy company, it
                 offers 1.6x-4x smaller index footprint with competitive
                 search times as well.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tozun:2014:AAI,
  author =       "Pinar T{\"o}z{\"u}n and Islam Atta and Anastasia
                 Ailamaki and Andreas Moshovos",
  title =        "{ADDICT}: advanced instruction chasing for
                 transactions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1893--1904",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent studies highlight that traditional transaction
                 processing systems utilize the micro-architectural
                 features of modern processors very poorly. L1
                 instruction cache and long-latency data misses dominate
                 execution time. As a result, more than half of the
                 execution cycles are wasted on memory stalls. Previous
                 works on reducing stall time aim at improving locality
                 through either hardware or software techniques.
                 However, exploiting hardware resources based on the
                 hints given by the software-side has not been widely
                 studied for data management systems. In this paper, we
                 observe that, independently of their high-level
                 functionality, transactions running in parallel on a
                 multicore system execute actions chosen from a limited
                 sub-set of predefined database operations. Therefore,
                 we initially perform a memory characterization study of
                 modern transaction processing systems using
                 standardized benchmarks. The analysis demonstrates that
                 same-type transactions exhibit at most 6\% overlap in
                 their data footprints whereas there is up to 98\%
                 overlap in instructions. Based on the findings, we
                 design ADDICT, a transaction scheduling mechanism that
                 aims at maximizing the instruction cache locality.
                 ADDICT determines the most frequent actions of database
                 operations, whose instruction footprint can fit in an
                 L1 instruction cache, and assigns a core to execute
                 each of these actions. Then, it schedules each action
                 on its corresponding core. Our prototype implementation
                 of ADDICT reduces L1 instruction misses by 85\% and the
                 long latency data misses by 20\%. As a result, ADDICT
                 leads up to a 50\% reduction in the total execution
                 time for the evaluated workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alsubaiee:2014:ASO,
  author =       "Sattam Alsubaiee and Yasser Altowim and Hotham
                 Altwaijry and Alexander Behm and Vinayak Borkar and
                 Yingyi Bu and Michael Carey and Inci Cetindil and
                 Madhusudan Cheelangi and Khurram Faraaz and Eugenia
                 Gabrielova and Raman Grover and Zachary Heilbron and
                 Young-Seok Kim and Chen Li and Guangqiang Li and Ji
                 Mahn Ok and Nicola Onose and Pouria Pirzadeh and
                 Vassilis Tsotras and Rares Vernica and Jian Wen and
                 Till Westmann",
  title =        "{AsterixDB}: a scalable, open source {BDMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1905--1916",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "AsterixDB is a new, full-function BDMS (Big Data
                 Management System) with a feature set that
                 distinguishes it from other platforms in today's open
                 source Big Data ecosystem. Its features make it
                 well-suited to applications like web data warehousing,
                 social data storage and analysis, and other use cases
                 related to Big Data. AsterixDB has a flexible NoSQL
                 style data model; a query language that supports a wide
                 range of queries; a scalable runtime; partitioned,
                 LSM-based data storage and indexing (including
                 B$^+$-tree, R-tree, and text indexes); support for
                 external as well as natively stored data; a rich set of
                 built-in types; support for fuzzy, spatial, and
                 temporal types and queries; a built-in notion of data
                 feeds for ingestion of data; and transaction support
                 akin to that of a NoSQL store. Development of AsterixDB
                 began in 2009 and led to a mid-2013 initial open source
                 release. This paper is the first complete description
                 of the resulting open source AsterixDB system. Covered
                 herein are the system's data model, its query language,
                 and its software architecture. Also included are a
                 summary of the current status of the project and a
                 first glimpse into how AsterixDB performs when compared
                 to alternative technologies, including a parallel
                 relational DBMS, a popular NoSQL store, and a popular
                 Hadoop-based SQL data analytics platform, for things
                 that both technologies can do. Also included is a brief
                 description of some initial trials that the system has
                 undergone and the lessons learned (and plans laid)
                 based on those early ``customer'' engagements.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2014:LLB,
  author =       "Ning Xu and Lei Chen and Bin Cui",
  title =        "{LogGP}: a log-based dynamic graph partitioning
                 method",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1917--1928",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increasing availability and scale of graph
                 data from Web 2.0, graph partitioning becomes one of
                 efficient preprocessing techniques to balance the
                 computing workload. Since the cost of partitioning the
                 entire graph is strictly prohibitive, there are some
                 recent tentative works towards streaming graph
                 partitioning which can run faster, be easily
                 paralleled, and be incrementally updated.
                 Unfortunately, the experiments show that the running
                 time of each partitioning is still unbalanced due to
                 the variation of workload access patterns during the
                 supersteps. In addition, the one-pass streaming
                 partitioning result is not always satisfactory for the
                 algorithms' local view of the graph. In this paper, we
                 present LogGP, a log-based graph partitioning system
                 that records, analyzes and reuses the historical
                 statistical information to refine the partitioning
                 result. LogGP can be used as a middle-ware and deployed
                 to many state-of-the-art paralleled graph processing
                 systems easily. LogGP utilizes the historical
                 partitioning results to generate a hyper-graph and uses
                 a novel hyper-graph streaming partitioning approach to
                 generate a better initial streaming graph partitioning
                 result. During the execution, the system uses running
                 logs to optimize graph partitioning which prevents
                 performance degradation. Moreover, LogGP can
                 dynamically repartition the massive graphs in
                 accordance with the structural changes. Extensive
                 experiments conducted on a moderate size of computing
                 cluster with real-world graph datasets demonstrate the
                 superiority of our approach against the
                 state-of-the-art solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papadakis:2014:SMB,
  author =       "George Papadakis and George Papastefanatos and Georgia
                 Koutrika",
  title =        "Supervised meta-blocking",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1929--1940",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity Resolution matches mentions of the same entity.
                 Being an expensive task for large data, its performance
                 can be improved by blocking, i.e., grouping similar
                 entities and comparing only entities in the same group.
                 Blocking improves the run-time of Entity Resolution,
                 but it still involves unnecessary comparisons that
                 limit its performance. Meta-blocking is the process of
                 restructuring a block collection in order to prune such
                 comparisons. Existing unsupervised meta-blocking
                 methods use simple pruning rules, which offer a rather
                 coarse-grained filtering technique that can be
                 conservative (i.e., keeping too many unnecessary
                 comparisons) or aggressive (i.e., pruning good
                 comparisons). In this work, we introduce supervised
                 meta-blocking techniques that learn classification
                 models for distinguishing promising comparisons. For
                 this task, we propose a small set of generic features
                 that combine a low extraction cost with high
                 discriminatory power. We show that supervised
                 meta-blocking can achieve high performance with small
                 training sets that can be manually created. We
                 analytically compare our supervised approaches with
                 baseline and competitor methods over 10 large-scale
                 datasets, both real and synthetic.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xie:2014:GTK,
  author =       "Min Xie and Laks V. S. Lakshmanan and Peter T. Wood",
  title =        "Generating top-$k$ packages via preference
                 elicitation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1941--1952",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There are several applications, such as play lists of
                 songs or movies, and shopping carts, where users are
                 interested in finding top-$k$ packages, consisting of
                 sets of items. In response to this need, there has been
                 a recent flurry of activity around extending classical
                 recommender systems (RS), which are effective at
                 recommending individual items, to recommend packages,
                 or sets of items. The few recent proposals for package
                 RS suffer from one of the following drawbacks: they
                 either rely on hard constraints which may be difficult
                 to be specified exactly by the user or on returning
                 Pareto-optimal packages which are too numerous for the
                 user to sift through. To overcome these limitations, we
                 propose an alternative approach for finding
                 personalized top-$k$ packages for users, by capturing
                 users' preferences over packages using a linear utility
                 function which the system learns. Instead of asking a
                 user to specify this function explicitly, which is
                 unrealistic, we explicitly model the uncertainty in the
                 utility function and propose a preference
                 elicitation-based framework for learning the utility
                 function through feedback provided by the user. We
                 propose several sampling-based methods which, given
                 user feedback, can capture the updated utility
                 function. We develop an efficient algorithm for
                 generating top-$k$ packages using the learned utility
                 function, where the rank ordering respects any of a
                 variety of ranking semantics proposed in the
                 literature. Through extensive experiments on both real
                 and synthetic datasets, we demonstrate the efficiency
                 and effectiveness of the proposed system for finding
                 top-$k$ packages.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:FRQ,
  author =       "Rui Li and Alex X. Liu and Ann L. Wang and Bezawada
                 Bruhadeshwar",
  title =        "Fast range query processing with strong privacy
                 protection for cloud computing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1953--1964",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Privacy has been the key road block to cloud computing
                 as clouds may not be fully trusted. This paper concerns
                 the problem of privacy preserving range query
                 processing on clouds. Prior schemes are weak in privacy
                 protection as they cannot achieve index
                 indistinguishability, and therefore allow the cloud to
                 statistically estimate the values of data and queries
                 using domain knowledge and history query results. In
                 this paper, we propose the first range query processing
                 scheme that achieves index indistinguishability under
                 the indistinguishability against chosen keyword attack
                 (IND-CKA). Our key idea is to organize indexing
                 elements in a complete binary tree called PBtree, which
                 satisfies structure indistinguishability (i.e., two
                 sets of data items have the same PBtree structure if
                 and only if the two sets have the same number of data
                 items) and node indistinguishability (i.e., the values
                 of PBtree nodes are completely random and have no
                 statistical meaning). We prove that our scheme is
                 secure under the widely adopted IND-CKA security model.
                 We propose two algorithms, namely PBtree traversal
                 width minimization and PBtree traversal depth
                 minimization, to improve query processing efficiency.
                 We prove that the worse case complexity of our query
                 processing algorithm using PBtree is $ O(| R | \log n)
                 $, where $n$ is the total number of data items and $R$
                 is the set of data items in the query result. We
                 implemented and evaluated our scheme on a real world
                 data set with 5 million items. For example, for a query
                 whose results contain ten data items, it takes only
                 0.17 milliseconds.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gao:2014:FTP,
  author =       "Yihan Gao and Aditya Parameswaran",
  title =        "Finish them!: pricing algorithms for human
                 computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1965--1976",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a batch of human computation tasks, a commonly
                 ignored aspect is how the price (i.e., the reward paid
                 to human workers) of these tasks must be set or varied
                 in order to meet latency or cost constraints. Often,
                 the price is set up-front and not modified, leading to
                 either a much higher monetary cost than needed (if the
                 price is set too high), or to a much larger latency
                 than expected (if the price is set too low). Leveraging
                 a pricing model from prior work, we develop algorithms
                 to optimally set and then vary price over time in order
                 to meet a (a) user-specified deadline while minimizing
                 total monetary cost (b) user-specified monetary budget
                 constraint while minimizing total elapsed time. We
                 leverage techniques from decision theory (specifically,
                 Markov Decision Processes) for both these problems, and
                 demonstrate that our techniques lead to upto 30\%
                 reduction in cost over schemes proposed in prior work.
                 Furthermore, we develop techniques to speed-up the
                 computation, enabling users to leverage the price
                 setting algorithms on-the-fly.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Catasta:2014:TTC,
  author =       "Michele Catasta and Alberto Tonon and Djellel Eddine
                 Difallah and Gianluca Demartini and Karl Aberer and
                 Philippe Cudre-Mauroux",
  title =        "{TransactiveDB}: tapping into collective human
                 memories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1977--1980",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database Management Systems (DBMSs) have been rapidly
                 evolving in the recent years, exploring ways to store
                 multi-structured data or to involve human processes
                 during query execution. In this paper, we outline a
                 future avenue for DBMSs supporting transactive memory
                 queries that can only be answered by a collection of
                 individuals connected through a given interaction
                 graph. We present TransactiveDB and its ecosystem,
                 which allow users to pose queries in order to
                 reconstruct collective human memories. We describe a
                 set of new transactive operators including TUnion,
                 TFill, TJoin, and TProjection. We also describe how
                 TransactiveDB leverages transactive operators---by
                 mixing query execution, social network analysis and
                 human computation---in order to effectively and
                 efficiently tap into the memories of all targeted
                 users.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yan:2014:BBC,
  author =       "Da Yan and James Cheng and Yi Lu and Wilfred Ng",
  title =        "{Blogel}: a block-centric framework for distributed
                 computation on real-world graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1981--1992",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The rapid growth in the volume of many real-world
                 graphs (e.g., social networks, web graphs, and spatial
                 networks) has led to the development of various
                 vertex-centric distributed graph computing systems in
                 recent years. However, real-world graphs from different
                 domains have very different characteristics, which
                 often create bottlenecks in vertex-centric parallel
                 graph computation. We identify three such important
                 characteristics from a wide spectrum of real-world
                 graphs, namely (1) skewed degree distribution, (2)
                 large diameter, and (3) (relatively) high density.
                 Among them, only (1) has been studied by existing
                 systems, but many real-world power-law graphs also
                 exhibit the characteristics of (2) and (3). In this
                 paper, we propose a block-centric framework, called
                 Blogel, which naturally handles all the three adverse
                 graph characteristics. Blogel programmers may think
                 like a block and develop efficient algorithms for
                 various graph problems. We propose parallel algorithms
                 to partition an arbitrary graph into blocks
                 efficiently, and block-centric programs are then run
                 over these blocks. Our experiments on large real-world
                 graphs verified that Blogel is able to achieve orders
                 of magnitude performance improvements over the
                 state-of-the-art distributed graph computing systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liagouris:2014:EII,
  author =       "John Liagouris and Manolis Terrovitis",
  title =        "Efficient identification of implicit facts in
                 incomplete {OWL2-EL} knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "1993--2004",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Integrating incomplete and possibly inconsistent data
                 from various sources is a challenge that arises in
                 several application areas, especially in the management
                 of scientific data. A rising trend for data integration
                 is to model the data as axioms in the Web Ontology
                 Language (OWL) and use inference rules to identify new
                 facts. Although there are several approaches that
                 employ OWL for data integration, there is little work
                 on scalable algorithms able to handle large datasets
                 that do not fit in main memory. The main contribution
                 of this paper is an algorithm that allows the effective
                 use of OWL for integrating data in an environment with
                 limited memory. The core idea is to exhaustively apply
                 a set of complex inference rules on large disk-resident
                 datasets. To the best of our knowledge, this is the
                 first work that proposes an I/O-aware algorithm for
                 tackling with such an expressive subset of OWL like the
                 one we address here. Previous approaches considered
                 either simpler models (e.g. RDFS) or main-memory
                 algorithms. In the paper we detail the proposed
                 algorithm, prove its correctness, and experimentally
                 evaluate it on real and synthetic data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:WCA,
  author =       "Chen Jason Zhang and Yongxin Tong and Lei Chen",
  title =        "Where to: crowd-aided path selection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "2005--2016",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the widespread use of geo-positioning services
                 (GPS), GPS-based navigation systems have become ever
                 more of an integral part of our daily lives. GPS-based
                 navigation systems usually suggest multiple paths for
                 any given pair of source and target, leaving users
                 perplexed when trying to select the best one among
                 them, namely the problem of best path selection. Too
                 many suggested paths may jeopardize the usability of
                 the recommendation data, and decrease user
                 satisfaction. Although existing studies have already
                 partially relieved this problem through integrating
                 historical traffic logs or updating traffic conditions
                 periodically, their solutions neglect the potential
                 contribution of human experience. In this paper, we
                 resort to crowdsourcing to ease the pain of the best
                 path selection. The first step of appropriately using
                 the crowd is to ask proper questions. For the best path
                 selection problem, simple questions (e.g. binary
                 voting) over compete paths cannot be directly applied
                 to road networks due to their being too complex for
                 crowd workers. Thus, this paper makes the first
                 contribution by designing two types of questions,
                 namely Routing Query (RQ) and Binary Routing Query
                 (BRQ), to ask the crowd to decide which direction to
                 take at each road intersection. Furthermore, we propose
                 a series of efficient algorithms to dynamically manage
                 the questions in order to reduce the selection hardness
                 within a limited budget. Finally, we compare the
                 proposed methods against two baselines, and the
                 effectiveness and efficiency of our proposals are
                 verified by the results from simulations and
                 experiments on a real-world crowdsourcing platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2014:LSR,
  author =       "Yan Huang and Favyen Bastani and Ruoming Jin and
                 Xiaoyang Sean Wang",
  title =        "Large scale real-time ridesharing with service
                 guarantee on road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "7",
  number =       "14",
  pages =        "2017--2028",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 4 17:20:43 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Urban traffic gridlock is a familiar scene. At the
                 same time, the mean occupancy rate of personal vehicle
                 trips in the United States is only 1.6 persons per
                 vehicle mile. Ridesharing has the potential to solve
                 many environmental, congestion, pollution, and energy
                 problems. In this paper, we introduce the problem of
                 large scale real-time ridesharing with service
                 guarantee on road networks. Trip requests are
                 dynamically matched to vehicles while trip waiting and
                 service time constraints are satisfied. We first
                 propose two scheduling algorithms: a branch-and-bound
                 algorithm and an integer programming algorithm.
                 However, these algorithms do not adapt well to the
                 dynamic nature of the ridesharing problem. Thus, we
                 propose kinetic tree algorithms which are better suited
                 to efficient scheduling of dynamic requests and adjust
                 routes on-the-fly. We perform experiments on a large
                 Shanghai taxi dataset. Results show that the kinetic
                 tree algorithms outperform other algorithms
                 significantly.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2014:SSA,
  author =       "Yifang Sun and Wei Wang and Jianbin Qin and Ying Zhang
                 and Xuemin Lin",
  title =        "{SRS}: solving $c$-approximate nearest neighbor
                 queries in high dimensional {Euclidean} space with a
                 tiny index",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "1--12",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nearest neighbor searches in high-dimensional space
                 have many important applications in domains such as
                 data mining, and multimedia databases. The problem is
                 challenging due to the phenomenon called ``curse of
                 dimensionality''. An alternative solution is to
                 consider algorithms that returns a $c$-approximate
                 nearest neighbor ($c$-ANN) with guaranteed
                 probabilities. Locality Sensitive Hashing (LSH) is
                 among the most widely adopted method, and it achieves
                 high efficiency both in theory and practice. However,
                 it is known to require an extremely high amount of
                 space for indexing, hence limiting its scalability. In
                 this paper, we propose several surprisingly simple
                 methods to answer $c$-ANN queries with theoretical
                 guarantees requiring only a single tiny index. Our
                 methods are highly flexible and support a variety of
                 functionalities, such as finding the exact nearest
                 neighbor with any given probability. In the experiment,
                 our methods demonstrate superior performance against
                 the state-of-the-art LSH-based methods, and scale up
                 well to 1 billion high-dimensional points on a single
                 commodity PC.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dallachiesa:2014:TKN,
  author =       "Michele Dallachiesa and Themis Palpanas and Ihab F.
                 Ilyas",
  title =        "Top-$k$ nearest neighbor search in uncertain data
                 series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "13--24",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many real applications consume data that is
                 intrinsically uncertain, noisy and error-prone. In this
                 study, we investigate the problem of finding the
                 top-$k$ nearest neighbors in uncertain data series,
                 which occur in several different domains. We formalize
                 the top-$k$ nearest neighbor problem for uncertain data
                 series, and describe a model for uncertain data series
                 that captures both uncertainty and correlation. This
                 distinguishes our approach from prior work that
                 compromises the accuracy of the model by assuming
                 independence of the value distribution at neighboring
                 time-stamps. We introduce the Holistic-P$k$NN
                 algorithm, which uses novel metric bounds for uncertain
                 series and an efficient refinement strategy to reduce
                 the overall number of required probability estimates.
                 We evaluate our proposal under a variety of settings
                 using a combination of synthetic and 45 real datasets
                 from diverse domains. The results demonstrate the
                 significant advantages of the proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:RBP,
  author =       "Jiexing Li and Jeffrey Naughton and Rimma V. Nehme",
  title =        "Resource bricolage for parallel database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "25--36",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Running parallel database systems in an environment
                 with heterogeneous resources has become increasingly
                 common, due to cluster evolution and increasing
                 interest in moving applications into public clouds. For
                 database systems running in a heterogeneous cluster,
                 the default uniform data partitioning strategy may
                 overload some of the slow machines while at the same
                 time it may under-utilize the more powerful machines.
                 Since the processing time of a parallel query is
                 determined by the slowest machine, such an allocation
                 strategy may result in a significant query performance
                 degradation. We take a first step to address this
                 problem by introducing a technique we call resource
                 bricolage that improves database performance in
                 heterogeneous environments. Our approach quantifies the
                 performance differences among machines with various
                 resources as they process workloads with diverse
                 resource requirements. We formalize the problem of
                 minimizing workload execution time and view it as an
                 optimization problem, and then we employ linear
                 programming to obtain a recommended data partitioning
                 scheme. We verify the effectiveness of our technique
                 with an extensive experimental study on a commercial
                 database system.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Graefe:2014:MPB,
  author =       "Goetz Graefe and Haris Volos and Hideaki Kimura and
                 Harumi Kuno and Joseph Tucek and Mark Lillibridge and
                 Alistair Veitch",
  title =        "In-memory performance for big data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "37--48",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "When a working set fits into memory, the overhead
                 imposed by the buffer pool renders traditional
                 databases non-competitive with in-memory designs that
                 sacrifice the benefits of a buffer pool. However,
                 despite the large memory available with modern
                 hardware, data skew, shifting workloads, and complex
                 mixed workloads make it difficult to guarantee that a
                 working set will fit in memory. Hence, some recent work
                 has focused on enabling in-memory databases to protect
                 performance when the working data set almost fits in
                 memory. Contrary to those prior efforts, we enable
                 buffer pool designs to match in-memory performance
                 while supporting the ``big data'' workloads that
                 continue to require secondary storage, thus providing
                 the best of both worlds. We introduce here a novel
                 buffer pool design that adapts pointer swizzling for
                 references between system objects (as opposed to
                 application objects), and uses it to practically
                 eliminate buffer pool overheads for memoryresident
                 data. Our implementation and experimental evaluation
                 demonstrate that we achieve graceful performance
                 degradation when the working set grows to exceed the
                 buffer pool size, and graceful improvement when the
                 working set shrinks towards and below the memory and
                 buffer pool sizes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Long:2014:TSM,
  author =       "Cheng Long and Raymond Chi-Wing Wong and H. V.
                 Jagadish",
  title =        "Trajectory simplification: on minimizing the
                 direction-based error",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "49--60",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Trajectory data is central to many applications with
                 moving objects. Raw trajectory data is usually very
                 large, and so is simplified before it is stored and
                 processed. Many trajectory simplification notions have
                 been proposed, and among them, the direction-preserving
                 trajectory simplification (DPTS) which aims at
                 protecting the direction information has been shown to
                 perform quite well. However, existing studies on DPTS
                 require users to specify an error tolerance which users
                 might not know how to set properly in some cases (e.g.,
                 the error tolerance could only be known at some future
                 time and simply setting one error tolerance does not
                 meet the needs since the simplified trajectories would
                 usually be used in many different applications which
                 accept different error tolerances). In these cases, a
                 better solution is to minimize the error while
                 achieving a pre-defined simplification size. For this
                 purpose, in this paper, we define a problem called
                 Min-Error and develop two exact algorithms and one
                 2-factor approximate algorithm for the problem.
                 Extensive experiments on real datasets verified our
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{ElGebaly:2014:IIE,
  author =       "Kareem {El Gebaly} and Parag Agrawal and Lukasz Golab
                 and Flip Korn and Divesh Srivastava",
  title =        "Interpretable and informative explanations of
                 outcomes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "61--72",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we solve the following data
                 summarization problem: given a multi-dimensional data
                 set augmented with a binary attribute, how can we
                 construct an interpretable and informative summary of
                 the factors affecting the binary attribute in terms of
                 the combinations of values of the dimension attributes?
                 We refer to such summaries as explanation tables. We
                 show the hardness of constructing optimally-informative
                 explanation tables from data, and we propose effective
                 and efficient heuristics. The proposed heuristics are
                 based on sampling and include optimizations related to
                 computing the information content of a summary from a
                 sample of the data. Using real data sets, we
                 demonstrate the advantages of explanation tables
                 compared to related approaches that can be adapted to
                 solve our problem, and we show significant performance
                 benefits of our optimizations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:CIN,
  author =       "Fei Li and H. V. Jagadish",
  title =        "Constructing an interactive natural language interface
                 for relational databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "73--84",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Natural language has been the holy grail of query
                 interface designers, but has generally been considered
                 too hard to work with, except in limited specific
                 circumstances. In this paper, we describe the
                 architecture of an interactive natural language query
                 interface for relational databases. Through a carefully
                 limited interaction with the user, we are able to
                 correctly interpret complex natural language queries,
                 in a generic manner across a range of domains. By these
                 means, a logically complex English language sentence is
                 correctly translated into a SQL query, which may
                 include aggregation, nesting, and various types of
                 joins, among other things, and can be evaluated against
                 an RDBMS. We have constructed a system, NaLIR (Natural
                 Language Interface for Relational databases), embodying
                 these ideas. Our experimental assessment, through user
                 studies, demonstrates that NaLIR is good enough to be
                 usable in practice: even naive users are able to
                 specify quite complex ad-hoc queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2014:LGD,
  author =       "Yuanyuan Zhu and Jeffrey Xu Yu and Lu Qin",
  title =        "Leveraging graph dimensions in online graph search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "85--96",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphs have been widely used due to its expressive
                 power to model complicated relationships. However,
                 given a graph database $ D_g = \{ g_1, g_2, \ldots, g_n
                 \} $, it is challenging to process graph queries since
                 a basic graph query usually involves costly graph
                 operations such as maximum common subgraph and graph
                 edit distance computation, which are NP-hard. In this
                 paper, we study a novel DS-preserved mapping which maps
                 graphs in a graph database$ D_g $ onto a
                 multidimensional space $ M_g $ under a structural
                 dimension $M$ using a mapping function $ \phi $ (). The
                 DS-preserved mapping preserves two things: distance and
                 structure. By the distance-preserving, it means that
                 any two graphs $ g_i$ and $ g_j$ in $ D_g$ must map to
                 two data objects $ \phi (g_i)$ and $ \phi (g_j)$ in $
                 M_g$, such that the distance, $ d(\phi (g_i), \phi
                 (g_j))$, between $ \phi (g_i)$ and $ \phi (g_j)$ in $
                 M_g$ approximates the graph dissimilarity $ \delta
                 (g_i, g_j)$ in $ D_g$. By the structure-preserving, it
                 further means that for a given unseen query graph $q$,
                 the distance between $q$ and any graph $ g_i$ in $ D_g$
                 needs to be preserved such that $ \delta (q, g_i)
                 \approx d(\phi (q), \phi (g_i))$. We discuss the
                 rationality of using graph dimension $M$ for online
                 graph processing, and show how to identify a small set
                 of subgraphs to form $M$ efficiently. We propose an
                 iterative algorithm DSPM to compute the graph
                 dimension, and discuss its optimization techniques. We
                 also give an approximate algorithm DSPMap in order to
                 handle a large graph database. We conduct extensive
                 performance studies on both real and synthetic datasets
                 to evaluate the top-$k$ similarity query which is to
                 find top-$k$ similar graphs from $ D_g$ for a query
                 graph, and show the effectiveness and efficiency of our
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sidlauskas:2014:SJM,
  author =       "Darius Sidlauskas and Christian S. Jensen",
  title =        "Spatial joins in main memory: implementation
                 matters!",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "1",
  pages =        "97--100",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:33 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A recent PVLDB paper reports on experimental analyses
                 of ten spatial join techniques in main memory. We build
                 on this comprehensive study to raise awareness of the
                 fact that empirical running time performance findings
                 in main-memory settings are results of not only the
                 algorithms and data structures employed, but also their
                 implementation, which complicates the interpretation of
                 the results. In particular, we re-implement the worst
                 performing technique without changing the underlying
                 high-level algorithm, and we then offer evidence that
                 the resulting re-implementation is capable of
                 outperforming all the other techniques. This study
                 demonstrates that in main memory, where no
                 time-consuming I/O can mask variations in
                 implementation, implementation details are very
                 important; and it offers a concrete illustration of how
                 it is difficult to make conclusions from empirical
                 running time performance findings in main-memory
                 settings about data structures and algorithms
                 studied.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2014:SES,
  author =       "Xiaoyang Wang and Ying Zhang and Wenjie Zhang and
                 Xuemin Lin and Wei Wang",
  title =        "Selectivity estimation on streaming spatio-textual
                 data using local correlations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "2",
  pages =        "101--112",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we investigate the selectivity
                 estimation problem for streaming spatio-textual data,
                 which arises in many social network and geo-location
                 applications. Specifically, given a set of continuously
                 and rapidly arriving spatio-textual objects, each of
                 which is described by a geo-location and a short text,
                 we aim to accurately estimate the cardinality of a
                 spatial keyword query on objects seen so far, where a
                 spatial keyword query consists of a search region and a
                 set of query keywords. To the best of our knowledge,
                 this is the first work to address this important
                 problem. We first extend two existing techniques to
                 solve this problem, and show their limitations.
                 Inspired by two key observations on the ``locality'' of
                 the correlations among query keywords, we propose a
                 local correlation based method by utilizing an
                 augmented adaptive space partition tree ($ A^2 $SP-tree
                 for short) to approximately learn a local Bayesian
                 network on-the-fly for a given query and estimate its
                 selectivity. A novel local boosting approach is
                 presented to further enhance the learning accuracy of
                 local Bayesian networks. Our comprehensive experiments
                 on real-life datasets demonstrate the superior
                 performance of the local correlation based algorithm in
                 terms of estimation accuracy compared to other
                 competitors.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:PMK,
  author =       "Chuanwen Li and Yu Gu and Jianzhong Qi and Ge Yu and
                 Rui Zhang and Wang Yi",
  title =        "Processing moving $k$ {NN} queries using influential
                 neighbor sets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "2",
  pages =        "113--124",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The moving $k$ nearest neighbor query, which computes
                 one's $k$ nearest neighbor set and maintains it while
                 at move, is gaining importance due to the prevalent use
                 of smart mobile devices such as smart phones. Safe
                 region is a popular technique in processing the moving
                 $k$ nearest neighbor query. It is a region where the
                 movement of the query object does not cause the current
                 $k$ nearest neighbor set to change. Processing a moving
                 $k$ nearest neighbor query is a continuing process of
                 checking the validity of the safe region and
                 recomputing it if invalidated. The size of the safe
                 region largely decides the frequency of safe region
                 recomputation and hence query processing efficiency.
                 Existing moving $k$ nearest neighbor algorithms lack
                 efficiency due to either computing small safe regions
                 and have to recompute frequently or computing large
                 safe regions (i.e., an order-$k$ Voronoi cell) with a
                 high cost. In this paper, we take a third approach.
                 Instead of safe regions, we use a small set of safe
                 guarding objects. We prove that, as long as the the
                 current $k$ nearest neighbors are closer to the query
                 object than the safe guarding objects, the current $k$
                 nearest neighbors stay valid and no recomputation is
                 required. This way, we avoid the high cost of safe
                 region recomputation. We also prove that, the region
                 defined by the safe guarding objects is the largest
                 possible safe region. This means that the recomputation
                 frequency of our method is also minimized. We conduct
                 extensive experiments comparing our method with the
                 state-of-the-art method on both real and synthetic data
                 sets. The results confirm the superiority of our
                 method.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mozafari:2014:SCS,
  author =       "Barzan Mozafari and Purna Sarkar and Michael Franklin
                 and Michael Jordan and Samuel Madden",
  title =        "Scaling up crowd-sourcing to very large datasets: a
                 case for active learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "2",
  pages =        "125--136",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Crowd-sourcing has become a popular means of acquiring
                 labeled data for many tasks where humans are more
                 accurate than computers, such as image tagging, entity
                 resolution, and sentiment analysis. However, due to the
                 time and cost of human labor, solutions that rely
                 solely on crowd-sourcing are often limited to small
                 datasets (i.e., a few thousand items). This paper
                 proposes algorithms for integrating machine learning
                 into crowd-sourced databases in order to combine the
                 accuracy of human labeling with the speed and
                 cost-effectiveness of machine learning classifiers. By
                 using active learning as our optimization strategy for
                 labeling tasks in crowd-sourced databases, we can
                 minimize the number of questions asked to the crowd,
                 allowing crowd-sourced applications to scale (i.e.,
                 label much larger datasets at lower costs). Designing
                 active learning algorithms for a crowd-sourced database
                 poses many practical challenges: such algorithms need
                 to be generic, scalable, and easy to use, even for
                 practitioners who are not machine learning experts. We
                 draw on the theory of nonparametric bootstrap to
                 design, to the best of our knowledge, the first active
                 learning algorithms that meet all these requirements.
                 Our results, on 3 real-world datasets collected with
                 Amazons Mechanical Turk, and on 15 UCI datasets, show
                 that our methods on average ask 1--2 orders of
                 magnitude fewer questions than the baseline, and $
                 4.5$--$ 44 \times $ fewer than existing active learning
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2014:CCO,
  author =       "Dingyu Yang and Dongxiang Zhang and Kian-Lee Tan and
                 Jian Cao and Fr{\'e}d{\'e}ric {Le Mou{\"e}l}",
  title =        "{CANDS}: continuous optimal navigation via distributed
                 stream processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "2",
  pages =        "137--148",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Shortest path query over a dynamic road network is a
                 prominent problem for the optimization of real-time
                 traffic systems. Existing solutions rely either on a
                 centralized index system with tremendous
                 pre-computation overhead, or on a distributed graph
                 processing system such as Pregel that requires much
                 synchronization effort. However, the performance of
                 these systems degenerates with frequent route path
                 updates caused by continuous traffic condition change.
                 In this paper, we build CANDS, a distributed stream
                 processing platform for continuous optimal shortest
                 path queries. It provides an asynchronous solution to
                 answering a large quantity of shortest path queries. It
                 is able to efficiently detect affected paths and adjust
                 their paths in the face of traffic updates. Moreover,
                 the affected paths can be quickly updated to the
                 optimal solutions throughout the whole navigation
                 process. Experimental results demonstrate that the
                 performance for answering shortest path queries by
                 CANDS is two orders of magnitude better than that of
                 GPS, an open-source implementation of Pregel. In
                 addition, CANDS provides fast response to traffic
                 updates to guarantee the optimality of answering
                 shortest path queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Begum:2014:RTS,
  author =       "Nurjahan Begum and Eamonn Keogh",
  title =        "Rare time series motif discovery from unbounded
                 streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "2",
  pages =        "149--160",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The detection of time series motifs, which are
                 approximately repeated subsequences in time series
                 streams, has been shown to have great utility as a
                 subroutine in many higher-level data mining algorithms.
                 However, this detection becomes much harder in cases
                 where the motifs of interest are vanishingly rare or
                 when faced with a never-ending stream of data. In this
                 work we investigate algorithms to find such rare
                 motifs. We demonstrate that under reasonable
                 assumptions we must abandon any hope of an exact
                 solution to the motif problem as it is normally
                 defined; however, we introduce algorithms that allow us
                 to solve the underlying problem with high
                 probability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bu:2014:PBG,
  author =       "Yingyi Bu and Vinayak Borkar and Jianfeng Jia and
                 Michael J. Carey and Tyson Condie",
  title =        "Pregelix: {Big(ger)} graph analytics on a dataflow
                 engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "2",
  pages =        "161--172",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There is a growing need for distributed graph
                 processing systems that are capable of gracefully
                 scaling to very large graph datasets. Unfortunately,
                 this challenge has not been easily met due to the
                 intense memory pressure imposed by process-centric,
                 message passing designs that many graph processing
                 systems follow. Pregelix is a new open source
                 distributed graph processing system that is based on an
                 iterative dataflow design that is better tuned to
                 handle both in-memory and out-of-core workloads. As
                 such, Pregelix offers improved performance
                 characteristics and scaling properties over current
                 open source systems (e.g., we have seen up to $ 15
                 \times $ speedup compared to Apache Giraph and up to $
                 35 \times $ speedup compared to distributed GraphLab),
                 and more effective use of available machine resources
                 to support Big(ger) Graph Analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sridharan:2014:PRC,
  author =       "Shriram Sridharan and Jignesh M. Patel",
  title =        "Profiling {R} on a contemporary processor",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "2",
  pages =        "173--184",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "R is a popular data analysis language, but there is
                 scant experimental data characterizing the run-time
                 profile of R programs. This paper addresses this
                 limitation by systematically cataloging where time is
                 spent when running R programs. Our evaluation using
                 four different workloads shows that when analyzing
                 large datasets, R programs (a) spend more than 85\% of
                 their time in processor stalls, which leads to slower
                 execution times, (b) trigger the garbage collector
                 frequently, which leads to higher memory stalls, and
                 (c) create a large number of unnecessary temporary
                 objects that causes R to swap to disk quickly even for
                 datasets that are far smaller than the available main
                 memory. Addressing these issues should allow R programs
                 to run faster than they do today, and allow R to be
                 used for analyzing even larger datasets. As outlined in
                 this paper, the results presented in this paper
                 motivate a number of future research investigations in
                 the database, architecture, and programming language
                 communities. All data and code that is used in this
                 paper (which includes the R programs, and changes to
                 the R source code for instrumentation) can be found at:
                 {\tt http://quickstep.cs.wisc.edu/dissecting-R/}.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bailis:2014:CAD,
  author =       "Peter Bailis and Alan Fekete and Michael J. Franklin
                 and Ali Ghodsi and Joseph M. Hellerstein and Ion
                 Stoica",
  title =        "Coordination avoidance in database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "185--196",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Minimizing coordination, or blocking communication
                 between concurrently executing operations, is key to
                 maximizing scalability, availability, and high
                 performance in database systems. However, uninhibited
                 coordination-free execution can compromise application
                 correctness, or consistency. When is coordination
                 necessary for correctness? The classic use of
                 serializable transactions is sufficient to maintain
                 correctness but is not necessary for all applications,
                 sacrificing potential scalability. In this paper, we
                 develop a formal framework, invariant confluence, that
                 determines whether an application requires coordination
                 for correct execution. By operating on
                 application-level invariants over database states
                 (e.g., integrity constraints), invariant confluence
                 analysis provides a necessary and sufficient condition
                 for safe, coordination-free execution. When programmers
                 specify their application invariants, this analysis
                 allows databases to coordinate only when anomalies that
                 might violate invariants are possible. We analyze the
                 invariant confluence of common invariants and
                 operations from real-world database systems (i.e.,
                 integrity constraints) and applications and show that
                 many are invariant confluent and therefore achievable
                 without coordination. We apply these results to a
                 proof-of-concept coordination-avoiding database
                 prototype and demonstrate sizable performance gains
                 compared to serializable execution, notably a 25-fold
                 improvement over prior TPC-C New-Order performance on a
                 200 server cluster.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeng:2014:QSI,
  author =       "Qiang Zeng and Jignesh M. Patel and David Page",
  title =        "{QuickFOIL}: scalable inductive logic programming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "197--208",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Inductive Logic Programming (ILP) is a classic machine
                 learning technique that learns first-order rules from
                 relational-structured data. However, to-date most ILP
                 systems can only be applied to small datasets (tens of
                 thousands of examples). A long-standing challenge in
                 the field is to scale ILP methods to larger data sets.
                 This paper presents a method called QuickFOIL that
                 addresses this limitation. QuickFOIL employs a new
                 scoring function and a novel pruning strategy that
                 enables the algorithm to find high-quality rules.
                 QuickFOIL can also be implemented as an in-RDBMS
                 algorithm. Such an implementation presents a host of
                 query processing and optimization challenges that we
                 address in this paper. Our empirical evaluation shows
                 that QuickFOIL can scale to large datasets consisting
                 of hundreds of millions tuples, and is often more than
                 order of magnitude more efficient than other existing
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2014:SAE,
  author =       "Xiangyao Yu and George Bezerra and Andrew Pavlo and
                 Srinivas Devadas and Michael Stonebraker",
  title =        "Staring into the abyss: an evaluation of concurrency
                 control with one thousand cores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "209--220",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Computer architectures are moving towards an era
                 dominated by many-core machines with dozens or even
                 hundreds of cores on a single chip. This unprecedented
                 level of on-chip parallelism introduces a new dimension
                 to scalability that current database management systems
                 (DBMSs) were not designed for. In particular, as the
                 number of cores increases, the problem of concurrency
                 control becomes extremely challenging. With hundreds of
                 threads running in parallel, the complexity of
                 coordinating competing accesses to data will likely
                 diminish the gains from increased core counts. To
                 better understand just how unprepared current DBMSs are
                 for future CPU architectures, we performed an
                 evaluation of concurrency control for on-line
                 transaction processing (OLTP) workloads on many-core
                 chips. We implemented seven concurrency control
                 algorithms on a main-memory DBMS and using computer
                 simulations scaled our system to 1024 cores. Our
                 analysis shows that all algorithms fail to scale to
                 this magnitude but for different reasons. In each case,
                 we identify fundamental bottlenecks that are
                 independent of the particular database implementation
                 and argue that even state-of-the-art DBMSs suffer from
                 these limitations. We conclude that rather than
                 pursuing incremental solutions, many-core chips may
                 require a completely redesigned DBMS architecture that
                 is built from ground up and is tightly coupled with the
                 hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Trummer:2014:MOP,
  author =       "Immanuel Trummer and Christoph Koch",
  title =        "Multi-objective parametric query optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "221--232",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Classical query optimization compares query plans
                 according to one cost metric and associates each plan
                 with a constant cost value. In this paper, we introduce
                 the Multi-Objective Parametric Query Optimization (MPQ)
                 problem where query plans are compared according to
                 multiple cost metrics and the cost of a given plan
                 according to a given metric is modeled as a function
                 that depends on multiple parameters. The cost metrics
                 may for instance include execution time or monetary
                 fees; a parameter may represent the selectivity of a
                 query predicate that is unspecified at optimization
                 time. MPQ generalizes parametric query optimization
                 (which allows multiple parameters but only one cost
                 metric) and multi-objective query optimization (which
                 allows multiple cost metrics but no parameters). We
                 formally analyze the novel MPQ problem and show why
                 existing algorithms are inapplicable. We present a
                 generic algorithm for MPQ and a specialized version for
                 MPQ with piecewise-linear plan cost functions. We prove
                 that both algorithms find all relevant query plans and
                 experimentally evaluate the performance of our second
                 algorithm in a Cloud computing scenario.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Giceva:2014:DQP,
  author =       "Jana Giceva and Gustavo Alonso and Timothy Roscoe and
                 Tim Harris",
  title =        "Deployment of query plans on multicores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "233--244",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Efficient resource scheduling of multithreaded
                 software on multicore hardware is difficult given the
                 many parameters involved and the hardware heterogeneity
                 of existing systems. In this paper we explore the
                 efficient deployment of query plans over a multicore
                 machine. We focus on shared query systems, and
                 implement the proposed ideas using SharedDB. The goal
                 of the paper is to explore how to deliver maximum
                 performance and predictability, while minimizing
                 resource utilization when deploying query plans on
                 multicore machines. We propose to use resource activity
                 vectors to characterize the behavior of individual
                 database operators. We then present a novel deployment
                 algorithm which uses these vectors together with
                 dataflow information from the query plan to optimally
                 assign relational operators to physical cores.
                 Experiments demonstrate that this approach
                 significantly reduces resource requirements while
                 preserving performance and is robust across different
                 server architectures.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Taft:2014:SFG,
  author =       "Rebecca Taft and Essam Mansour and Marco Serafini and
                 Jennie Duggan and Aaron J. Elmore and Ashraf Aboulnaga
                 and Andrew Pavlo and Michael Stonebraker",
  title =        "E-store: fine-grained elastic partitioning for
                 distributed transaction processing systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "245--256",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "On-line transaction processing (OLTP) database
                 management systems (DBMSs) often serve time-varying
                 workloads due to daily, weekly or seasonal fluctuations
                 in demand, or because of rapid growth in demand due to
                 a company's business success. In addition, many OLTP
                 workloads are heavily skewed to ``hot'' tuples or
                 ranges of tuples. For example, the majority of NYSE
                 volume involves only 40 stocks. To deal with such
                 fluctuations, an OLTP DBMS needs to be elastic; that
                 is, it must be able to expand and contract resources in
                 response to load fluctuations and dynamically balance
                 load as hot tuples vary over time. This paper presents
                 E-Store, an elastic partitioning framework for
                 distributed OLTP DBMSs. It automatically scales
                 resources in response to demand spikes, periodic
                 events, and gradual changes in an application's
                 workload. E-Store addresses localized bottlenecks
                 through a two-tier data placement strategy: cold data
                 is distributed in large chunks, while smaller ranges of
                 hot tuples are assigned explicitly to individual nodes.
                 This is in contrast to traditional single-tier hash and
                 range partitioning strategies. Our experimental
                 evaluation of E-Store shows the viability of our
                 approach and its efficacy under variations in load
                 across a cluster of machines. Compared to single-tier
                 approaches, E-Store improves throughput by up to 130\%
                 while reducing latency by 80\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Thirumuruganathan:2014:BIM,
  author =       "Saravanan Thirumuruganathan and Habibur Rahman and
                 Sofiane Abbar and Gautam Das",
  title =        "Beyond itemsets: mining frequent featuresets over
                 structured items",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "257--268",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We assume a dataset of transactions generated by a set
                 of users over structured items where each item could be
                 described through a set of features. In this paper, we
                 are interested in identifying the frequent featuresets
                 (set of features) by mining item transactions. For
                 example, in a news website, items correspond to news
                 articles, the features are the named-entities/topics in
                 the articles and an item transaction would be the set
                 of news articles read by a user within the same
                 session. We show that mining frequent featuresets over
                 structured item transactions is a novel problem and
                 show that straightforward extensions of existing
                 frequent itemset mining techniques provide
                 unsatisfactory results. This is due to the fact that
                 while users are drawn to each item in the transaction
                 due to a subset of its features, the transaction by
                 itself does not provide any information about such
                 underlying preferred features of users. In order to
                 overcome this hurdle, we propose a featureset
                 uncertainty model where each item transaction could
                 have been generated by various featuresets with
                 different probabilities. We describe a novel approach
                 to transform item transactions into uncertain
                 transaction over featuresets and estimate their
                 probabilities using constrained least squares based
                 approach. We propose diverse algorithms to mine
                 frequent featuresets. Our experimental evaluation
                 provides a comparative analysis of the different
                 approaches proposed.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2014:ICD,
  author =       "Jun Zhang and Chaokun Wang and Jianmin Wang and
                 Jeffrey Xu Yu",
  title =        "Inferring continuous dynamic social influence and
                 personal preference for temporal behavior prediction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "269--280",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "It is always attractive and challenging to explore the
                 intricate behavior data and uncover people's
                 motivations, preference and habits, which can greatly
                 benefit many tasks including link prediction, item
                 recommendation, etc. Traditional work usually studies
                 people's behaviors without time information in a static
                 or discrete manner, assuming the underlying factors
                 stay invariant in a long period. However, we believe
                 people's behaviors are dynamic, and the contributing
                 factors including the social influence and personal
                 preference for behaviors are varying continuously over
                 time. Such continuous dynamics convey important
                 knowledge about people's behavior patterns; ignoring
                 them would lead to inaccurate models. In this work, we
                 address the continuous dynamic modeling of temporal
                 behaviors. To model the fully continuous temporal
                 dynamics of behaviors and the underlying factors, we
                 propose the DP-Space, a dynamic preference probability
                 space, which can capture their smooth variation in
                 various shapes over time with flexible basis functions.
                 Upon that we propose a generative dynamic behavior
                 model, ConTyor, which considers the temporal
                 item-adoption behaviors as joint effect of dynamic
                 social influence and varying personal preference over
                 continuous time. We also develop effective inference
                 methods for ConTyor and present its applications. We
                 conduct a comprehensive experimental study using
                 real-world datasets to evaluate the effectiveness of
                 our model and the temporal modeling. Results verify
                 that ConTyor outperforms existing state-of-the-art
                 static and temporal models in behavior predictions.
                 Moreover, in our detailed study on temporal modeling,
                 we show that temporal modeling is superior to static
                 approaches and modeling over continuous time is further
                 better than that over discrete time. We also
                 demonstrate that the ancient behavior data can still
                 become important and beneficial if modeled well.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2014:LSD,
  author =       "Yi Lu and James Cheng and Da Yan and Huanhuan Wu",
  title =        "Large-scale distributed graph computing systems: an
                 experimental evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "281--292",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the prevalence of graph data in real-world
                 applications (e.g., social networks, mobile phone
                 networks, web graphs, etc.) and their ever-increasing
                 size, many distributed graph computing systems have
                 been developed in recent years to process and analyze
                 massive graphs. Most of these systems adopt Pregel's
                 vertex-centric computing model, while various
                 techniques have been proposed to address the
                 limitations in the Pregel framework. However, there is
                 a lack of comprehensive comparative analysis to
                 evaluate the performance of various systems and their
                 techniques, making it difficult for users to choose the
                 best system for their applications. We conduct
                 extensive experiments to evaluate the performance of
                 existing systems on graphs with different
                 characteristics and on algorithms with different design
                 logic. We also study the effectiveness of various
                 techniques adopted in existing systems, and the
                 scalability of the systems. The results of our study
                 reveal the strengths and limitations of existing
                 systems, and provide valuable insights for users,
                 researchers and system developers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Inoue:2014:FSI,
  author =       "Hiroshi Inoue and Moriyoshi Ohara and Kenjiro Taura",
  title =        "Faster set intersection with {SIMD} instructions by
                 reducing branch mispredictions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "293--304",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Set intersection is one of the most important
                 operations for many applications such as Web search
                 engines or database management systems. This paper
                 describes our new algorithm to efficiently find set
                 intersections with sorted arrays on modern processors
                 with SIMD instructions and high branch misprediction
                 penalties. Our algorithm efficiently exploits SIMD
                 instructions and can drastically reduce branch
                 mispredictions. Our algorithm extends a merge-based
                 algorithm by reading multiple elements, instead of just
                 one element, from each of two input arrays and compares
                 all of the pairs of elements from the two arrays to
                 find the elements with the same values. The key insight
                 for our improvement is that we can reduce the number of
                 costly hard-to-predict conditional branches by
                 advancing a pointer by more than one element at a time.
                 Although this algorithm increases the total number of
                 comparisons, we can execute these comparisons more
                 efficiently using the SIMD instructions and gain the
                 benefits of the reduced branch misprediction overhead.
                 Our algorithm is suitable to replace existing standard
                 library functions, such as {\tt std::set\_intersection}
                 in C++, thus accelerating many applications, because
                 the algorithm is simple and requires no preprocessing
                 to generate additional data structures. We implemented
                 our algorithm on Xeon and POWER7+. The experimental
                 results show our algorithm outperforms the {\tt
                 std::set\_intersection} implementation delivered with
                 gcc by up to 5.2x using SIMD instructions and by up to
                 2.1x even without using SIMD instructions for 32-bit
                 and 64-bit integer datasets. Our SIMD algorithm also
                 outperformed an existing algorithm that can leverage
                 SIMD instructions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{El-Kishky:2014:STP,
  author =       "Ahmed El-Kishky and Yanglei Song and Chi Wang and
                 Clare R. Voss and Jiawei Han",
  title =        "Scalable topical phrase mining from text corpora",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "305--316",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While most topic modeling algorithms model text
                 corpora with unigrams, human interpretation often
                 relies on inherent grouping of terms into phrases. As
                 such, we consider the problem of discovering topical
                 phrases of mixed lengths. Existing work either performs
                 post processing to the results of unigram-based topic
                 models, or utilizes complex $n$-gram-discovery topic
                 models. These methods generally produce low-quality
                 topical phrases or suffer from poor scalability on even
                 moderately-sized datasets. We propose a different
                 approach that is both computationally efficient and
                 effective. Our solution combines a novel phrase mining
                 framework to segment a document into single and
                 multi-word phrases, and a new topic model that operates
                 on the induced document partition. Our approach
                 discovers high quality topical phrases with negligible
                 extra cost to the bag-of-words topic model in a variety
                 of datasets including research publication titles,
                 abstracts, reviews, and news articles.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tao:2014:ETK,
  author =       "Wenbo Tao and Minghe Yu and Guoliang Li",
  title =        "Efficient top-$k$ simrank-based similarity join",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "3",
  pages =        "317--328",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:34 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "SimRank is a popular and widely-adopted similarity
                 measure to evaluate the similarity between nodes in a
                 graph. It is time and space consuming to compute the
                 SimRank similarities for all pairs of nodes, especially
                 for large graphs. In real-world applications, users are
                 only interested in the most similar pairs. To address
                 this problem, in this paper we study the top-$k$
                 SimRank-based similarity join problem, which finds $k$
                 most similar pairs of nodes with the largest SimRank
                 similarities among all possible pairs. To the best of
                 our knowledge, this is the first attempt to address
                 this problem. We encode each node as a vector by
                 summarizing its neighbors and transform the calculation
                 of the SimRank similarity between two nodes to
                 computing the dot product between the corresponding
                 vectors. We devise an efficient two-step framework to
                 compute top-$k$ similar pairs using the vectors. For
                 large graphs, exact algorithms cannot meet the
                 high-performance requirement, and we also devise an
                 approximate algorithm which can efficiently identify
                 top-$k$ similar pairs under user-specified accuracy
                 requirement. Experiments on both real and synthetic
                 datasets show our method achieves high performance and
                 good scalability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2014:CQC,
  author =       "Jiong He and Shuhao Zhang and Bingsheng He",
  title =        "In-cache query co-processing on coupled {CPU--GPU}
                 architectures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "329--340",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, there have been some emerging processor
                 designs that the CPU and the GPU (Graphics Processing
                 Unit) are integrated in a single chip and share Last
                 Level Cache (LLC). However, the main memory bandwidth
                 of such coupled CPU-GPU architectures can be much lower
                 than that of a discrete GPU. As a result, current GPU
                 query co-processing paradigms can severely suffer from
                 memory stalls. In this paper, we propose a novel
                 in-cache query co-processing paradigm for main memory
                 On-Line Analytical Processing (OLAP) databases on
                 coupled CPU-GPU architectures. Specifically, we adapt
                 CPU-assisted prefetching to minimize cache misses in
                 GPU query co-processing and CPU-assisted decompression
                 to improve query execution performance. Furthermore, we
                 develop a cost model guided adaptation mechanism for
                 distributing the workload of prefetching,
                 decompression, and query execution between CPU and GPU.
                 We implement a system prototype and evaluate it on two
                 recent AMD APUs A8 and A10. The experimental results
                 show that (1) in-cache query co-processing can
                 effectively improve the performance of the
                 state-of-the-art GPU co-processing paradigm by up to
                 30\% and 33\% on A8 and A10, respectively, and (2) our
                 workload distribution adaption mechanism can
                 significantly improve the query performance by up to
                 36\% and 40\% on A8 and A10, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fujiwara:2014:SMR,
  author =       "Yasuhiro Fujiwara and Go Irie and Shari Kuroyama and
                 Makoto Onizuka",
  title =        "Scaling manifold ranking based image retrieval",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "341--352",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Manifold Ranking is a graph-based ranking algorithm
                 being successfully applied to retrieve images from
                 multimedia databases. Given a query image, Manifold
                 Ranking computes the ranking scores of images in the
                 database by exploiting the relationships among them
                 expressed in the form of a graph. Since Manifold
                 Ranking effectively utilizes the global structure of
                 the graph, it is significantly better at finding
                 intuitive results compared with current approaches.
                 Fundamentally, Manifold Ranking requires an inverse
                 matrix to compute ranking scores and so needs $ O(n^3)
                 $ time, where $n$ is the number of images. Manifold
                 Ranking, unfortunately, does not scale to support
                 databases with large numbers of images. Our solution,
                 Mogul, is based on two ideas: (1) It efficiently
                 computes ranking scores by sparse matrices, and (2) It
                 skips unnecessary score computations by estimating
                 upper bounding scores. These two ideas reduce the time
                 complexity of Mogul to $ O(n)$ from $ O(n^3)$ of the
                 inverse matrix approach. Experiments show that Mogul is
                 much faster and gives significantly better retrieval
                 quality than a state-of-the-art approximation
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Barber:2014:MEH,
  author =       "R. Barber and G. Lohman and I. Pandis and V. Raman and
                 R. Sidle and G. Attaluri and N. Chainani and S.
                 Lightstone and D. Sharpe",
  title =        "Memory-efficient hash joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "353--364",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present new hash tables for joins, and a hash join
                 based on them, that consumes far less memory and is
                 usually faster than recently published in-memory joins.
                 Our hash join is not restricted to outer tables that
                 fit wholly in memory. Key to this hash join is a new
                 concise hash table (CHT), a linear probing hash table
                 that has 100\% fill factor, and uses a sparse bitmap
                 with embedded population counts to almost entirely
                 avoid collisions. This bitmap also serves as a Bloom
                 filter for use in multi-table joins. We study the
                 random access characteristics of hash joins, and renew
                 the case for non-partitioned hash joins. We introduce a
                 variant of partitioned joins in which only the build is
                 partitioned, but the probe is not, as this is more
                 efficient for large outer tables than traditional
                 partitioned joins. This also avoids partitioning costs
                 during the probe, while at the same time allowing
                 parallel build without latching overheads.
                 Additionally, we present a variant of CHT, called a
                 concise array table (CAT), that can be used when the
                 key domain is moderately dense. CAT is collision-free
                 and avoids storing join keys in the hash table. We
                 perform a detailed comparison of CHT and CAT against
                 leading in-memory hash joins. Our experiments show that
                 we can reduce the memory usage by one to three orders
                 of magnitude, while also being competitive in
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alexe:2014:PAI,
  author =       "Bogdan Alexe and Mary Roth and Wang-Chiew Tan",
  title =        "Preference-aware integration of temporal data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "365--376",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A complete description of an entity is rarely
                 contained in a single data source, but rather, it is
                 often distributed across different data sources.
                 Applications based on personal electronic health
                 records, sentiment analysis, and financial records all
                 illustrate that significant value can be derived from
                 integrated, consistent, and queryable profiles of
                 entities from different sources. Even more so, such
                 integrated profiles are considerably enhanced if
                 temporal information from different sources is
                 carefully accounted for. We develop a simple and yet
                 versatile operator, called prawn, that is typically
                 called as a final step of an entity integration
                 workflow. Prawn is capable of consistently integrating
                 and resolving temporal conflicts in data that may
                 contain multiple dimensions of time based on a set of
                 preference rules specified by a user (hence the name
                 prawn for preference-aware union). In the event that
                 not all conflicts can be resolved through preferences,
                 one can enumerate each possible consistent
                 interpretation of the result returned by prawn at a
                 given time point through a polynomial-delay algorithm.
                 In addition to providing algorithms for implementing
                 prawn, we study and establish several desirable
                 properties of prawn. First, prawn produces the same
                 temporally integrated outcome, modulo representation of
                 time, regardless of the order in which data sources are
                 integrated. Second, prawn can be customized to
                 integrate temporal data for different applications by
                 specifying application-specific preference rules.
                 Third, we show experimentally that our implementation
                 of prawn is feasible on both ``small'' and ``big'' data
                 platforms in that it is efficient in both storage and
                 execution time. Finally, we demonstrate a fundamental
                 advantage of prawn: we illustrate that standard query
                 languages can be immediately used to pose useful
                 temporal queries over the integrated and resolved
                 entity repository.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2014:MSD,
  author =       "Chang Zhou and Jun Gao and Binbin Sun and Jeffrey Xu
                 Yu",
  title =        "{MOCgraph}: scalable distributed graph processing
                 using message online computing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "377--388",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Existing distributed graph processing frameworks,
                 e.g., Pregel, Giraph, GPS and GraphLab, mainly exploit
                 main memory to support flexible graph operations for
                 efficiency. Due to the complexity of graph analytics,
                 huge memory space is required especially for those
                 graph analytics that spawn large intermediate results.
                 Existing frameworks may terminate abnormally or degrade
                 performance seriously when the memory is exhausted or
                 the external storage has to be used. In this paper, we
                 propose MOCgraph, a scalable distributed graph
                 processing framework to reduce the memory footprint and
                 improve the scalability, based on message online
                 computing. MOCgraph consumes incoming messages in a
                 streaming manner, so as to handle larger graphs or more
                 complex analytics with the same memory capacity.
                 MOCgraph also exploits message online computing with
                 external storage to provide an efficient out-of-core
                 support. We implement MOCgraph on top of Apache Giraph,
                 and test it against several representative graph
                 algorithms on large graph datasets. Experiments
                 illustrate that MOCgraph is efficient and
                 memory-saving, especially for graph analytics with
                 large intermediate results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2014:NAL,
  author =       "Jian Huang and Karsten Schwan and Moinuddin K.
                 Qureshi",
  title =        "{NVRAM-aware} logging in transaction systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "389--400",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Emerging byte-addressable, non-volatile memory
                 technologies (NVRAM) like phase-change memory can
                 increase the capacity of future memory systems by
                 orders of magnitude. Compared to systems that rely on
                 disk storage, NVRAM-based systems promise significant
                 improvements in performance for key applications like
                 online transaction processing (OLTP). Unfortunately,
                 NVRAM systems suffer from two drawbacks: their
                 asymmetric read-write performance and the notable
                 higher cost of the new memory technologies compared to
                 disk. This paper investigates the cost-effective use of
                 NVRAM in transaction systems. It shows that using NVRAM
                 only for the logging subsystem (NV-Logging) provides
                 much higher transactions per dollar than simply
                 replacing all disk storage with NVRAM. Specifically,
                 for NV-Logging, we show that the software overheads
                 associated with centralized log buffers cause
                 performance bottlenecks and limit scaling. The
                 per-transaction logging methods described in the paper
                 help avoid these overheads, enabling concurrent logging
                 for multiple transactions. Experimental results with a
                 faithful emulation of future NVRAM-based servers using
                 the TPCC, TATP, and TPCB benchmarks show that
                 NV-Logging improves throughput by 1.42 --- 2.72x over
                 the costlier option of replacing all disk storage with
                 NVRAM. Results also show that NV-Logging performs 1.21
                 --- 6.71x better than when logs are placed into the
                 PMFS NVRAM-optimized file system. Compared to
                 state-of-the-art distributed logging, NV-Logging
                 delivers 20.4\% throughput improvements.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2014:THP,
  author =       "Badrish Chandramouli and Jonathan Goldstein and Mike
                 Barnett and Robert DeLine and Danyel Fisher and John C.
                 Platt and James F. Terwilliger and John Wernsing",
  title =        "Trill: a high-performance incremental query processor
                 for diverse analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "401--412",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper introduces Trill --- a new query processor
                 for analytics. Trill fulfills a combination of three
                 requirements for a query processor to serve the diverse
                 big data analytics space: (1) Query Model: Trill is
                 based on a tempo-relational model that enables it to
                 handle streaming and relational queries with early
                 results, across the latency spectrum from real-time to
                 offline; (2) Fabric and Language Integration: Trill is
                 architected as a high-level language library that
                 supports rich data-types and user libraries, and
                 integrates well with existing distribution fabrics and
                 applications; and (3) Performance: Trill's throughput
                 is high across the latency spectrum. For streaming
                 data, Trill's throughput is 2--4 orders of magnitude
                 higher than comparable streaming engines. For offline
                 relational queries, Trill's throughput is comparable to
                 a major modern commercial columnar DBMS. Trill uses a
                 streaming batched-columnar data representation with a
                 new dynamic compilation-based system architecture that
                 addresses all these requirements. In this paper, we
                 describe Trill's new design and architecture, and
                 report experimental results that demonstrate Trill's
                 high performance across diverse analytics scenarios. We
                 also describe how Trill's ability to support diverse
                 analytics has resulted in its adoption across many
                 usage scenarios at Microsoft.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Song:2014:EPM,
  author =       "Chunyao Song and Tingjian Ge and Cindy Chen and Jie
                 Wang",
  title =        "Event pattern matching over graph streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "413--424",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A graph is a fundamental and general data structure
                 underlying all data applications. Many applications
                 today call for the management and query capabilities
                 directly on graphs. Real time graph streams, as seen in
                 road networks, social and communication networks, and
                 web requests, are such applications. Event pattern
                 matching requires the awareness of graph structures,
                 which is different from traditional complex event
                 processing. It also requires a focus on the dynamicity
                 of the graph, time order constraints in patterns, and
                 online query processing, which deviates significantly
                 from previous work on subgraph matching as well. We
                 study the semantics and efficient online algorithms for
                 this important and intriguing problem, and evaluate our
                 approaches with extensive experiments over real world
                 datasets in four different domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2014:CAA,
  author =       "Qi Li and Yaliang Li and Jing Gao and Lu Su and Bo
                 Zhao and Murat Demirbas and Wei Fan and Jiawei Han",
  title =        "A confidence-aware approach for truth discovery on
                 long-tail data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "425--436",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In many real world applications, the same item may be
                 described by multiple sources. As a consequence,
                 conflicts among these sources are inevitable, which
                 leads to an important task: how to identify which piece
                 of information is trustworthy, i.e., the truth
                 discovery task. Intuitively, if the piece of
                 information is from a reliable source, then it is more
                 trustworthy, and the source that provides trustworthy
                 information is more reliable. Based on this principle,
                 truth discovery approaches have been proposed to infer
                 source reliability degrees and the most trustworthy
                 information (i.e., the truth) simultaneously. However,
                 existing approaches overlook the ubiquitous long-tail
                 phenomenon in the tasks, i.e., most sources only
                 provide a few claims and only a few sources make plenty
                 of claims, which causes the source reliability
                 estimation for small sources to be unreasonable. To
                 tackle this challenge, we propose a confidence-aware
                 truth discovery (CATD) method to automatically detect
                 truths from conflicting data with long-tail phenomenon.
                 The proposed method not only estimates source
                 reliability, but also considers the confidence interval
                 of the estimation, so that it can effectively reflect
                 real source reliability for sources with various levels
                 of participation. Experiments on four real world tasks
                 as well as simulated multi-source long-tail datasets
                 demonstrate that the proposed method outperforms
                 existing state-of-the-art truth discovery approaches by
                 successful discounting the effect of small sources.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shen:2014:FFR,
  author =       "Yanyan Shen and Gang Chen and H. V. Jagadish and Wei
                 Lu and Beng Chin Ooi and Bogdan Marius Tudor",
  title =        "Fast failure recovery in distributed graph processing
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "437--448",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed graph processing systems increasingly
                 require many compute nodes to cope with the
                 requirements imposed by contemporary graph-based Big
                 Data applications. However, increasing the number of
                 compute nodes increases the chance of node failures.
                 Therefore, provisioning an efficient failure recovery
                 strategy is critical for distributed graph processing
                 systems. This paper proposes a novel recovery mechanism
                 for distributed graph processing systems that
                 parallelizes the recovery process. The key idea is to
                 partition the part of the graph that is lost during a
                 failure among a subset of the remaining nodes. To do
                 so, we augment the existing checkpoint-based and
                 log-based recovery schemes with a partitioning
                 mechanism that is sensitive to the total computation
                 and communication cost of the recovery process. Our
                 implementation on top of the widely used Giraph system
                 outperforms checkpoint-based recovery by up to 30x on a
                 cluster of 40 compute nodes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Then:2014:MME,
  author =       "Manuel Then and Moritz Kaufmann and Fernando Chirigati
                 and Tuan-Anh Hoang-Vu and Kien Pham and Alfons Kemper
                 and Thomas Neumann and Huy T. Vo",
  title =        "The more the merrier: efficient multi-source graph
                 traversal",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "4",
  pages =        "449--460",
  month =        dec,
  year =         "2014",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph analytics on social networks, Web data, and
                 communication networks has been widely used in a
                 plethora of applications. Many graph analytics
                 algorithms are based on breadth-first search (BFS)
                 graph traversal, which is not only time-consuming for
                 large datasets but also involves much redundant
                 computation when executed multiple times from different
                 start vertices. In this paper, we propose Multi-Source
                 BFS (MS-BFS), an algorithm that is designed to run
                 multiple concurrent BFSs over the same graph on a
                 single CPU core while scaling up as the number of cores
                 increases. MS-BFS leverages the properties of
                 small-world networks, which apply to many real-world
                 graphs, and enables efficient graph traversal that: (i)
                 shares common computation across concurrent BFSs; (ii)
                 greatly reduces the number of random memory accesses;
                 and (iii) does not incur synchronization costs. We
                 demonstrate how a real graph analytics
                 application---all-vertices closeness centrality---can
                 be efficiently solved with MS-BFS. Furthermore, we
                 present an extensive experimental evaluation with both
                 synthetic and real datasets, including Twitter and
                 Wikipedia, showing that MS-BFS provides almost linear
                 scalability with respect to the number of cores and
                 excellent scalability for increasing graph sizes,
                 outperforming state-of-the-art BFS algorithms by more
                 than one order of magnitude when running a large number
                 of BFSs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wandelt:2015:MCS,
  author =       "Sebastian Wandelt and Ulf Leser",
  title =        "{MRCSI}: compressing and searching string collections
                 with multiple references",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "461--472",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Efficiently storing and searching collections of
                 similar strings, such as large populations of genomes
                 or long change histories of documents from Wikis, is a
                 timely and challenging problem. Several recent
                 proposals could drastically reduce space requirements
                 by exploiting the similarity between strings in
                 so-called reference-based compression. However, these
                 indexes are usually not searchable any more, i.e., in
                 these methods search efficiency is sacrificed for
                 storage efficiency. We propose Multi-Reference
                 Compressed Search Indexes (MRCSI) as a framework for
                 efficiently compressing dissimilar string collections.
                 In contrast to previous works which can use only a
                 single reference for compression, MRCSI (a) uses
                 multiple references for achieving increased compression
                 rates, where the reference set need not be specified by
                 the user but is determined automatically, and (b)
                 supports efficient approximate string searching with
                 edit distance constraints. We prove that finding the
                 smallest MRCSI is NP-hard. We then propose three
                 heuristics for computing MRCSIs achieving increasing
                 compression ratios. Compared to state-of-the-art
                 competitors, our methods target an interesting and
                 novel sweet-spot between high compression ratio versus
                 search efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2015:YFC,
  author =       "Rui Ding and Qiang Wang and Yingnong Dang and Qiang Fu
                 and Haidong Zhang and Dongmei Zhang",
  title =        "{YADING}: fast clustering of large-scale time series
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "473--484",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Fast and scalable analysis techniques are becoming
                 increasingly important in the era of big data, because
                 they are the enabling techniques to create real-time
                 and interactive experiences in data analysis. Time
                 series are widely available in diverse application
                 areas. Due to the large number of time series instances
                 (e.g., millions) and the high dimensionality of each
                 time series instance (e.g., thousands), it is
                 challenging to conduct clustering on large-scale time
                 series, and it is even more challenging to do so in
                 real-time to support interactive exploration. In this
                 paper, we propose a novel end-to-end time series
                 clustering algorithm, YADING, which automatically
                 clusters large-scale time series with fast performance
                 and quality results. Specifically, YADING consists of
                 three steps: sampling the input dataset, conducting
                 clustering on the sampled dataset, and assigning the
                 rest of the input data to the clusters generated on the
                 sampled dataset. In particular, we provide theoretical
                 proof on the lower and upper bounds of the sample size,
                 which not only guarantees YADING's high performance,
                 but also ensures the distribution consistency between
                 the input dataset and the sampled dataset. We also
                 select $ L_1 $ norm as similarity measure and the
                 multi-density approach as the clustering method. With
                 theoretical bound, this selection ensures YADING's
                 robustness to time series variations due to phase
                 perturbation and random noise. Evaluation results have
                 demonstrated that on typical-scale (100,000 time series
                 each with 1,000 dimensions) datasets, YADING is about
                 40 times faster than the state-of-the-art,
                 sampling-based clustering algorithm DENCLUE 2.0, and
                 about 1,000 times faster than DBSCAN and CLARANS.
                 YADING has also been used by product teams at Microsoft
                 to analyze service performance. Two of such use cases
                 are shared in this paper.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2015:HWS,
  author =       "Ting Wu and Lei Chen and Pan Hui and Chen Jason Zhang
                 and Weikai Li",
  title =        "Hear the whole story: towards the diversity of opinion
                 in crowdsourcing markets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "485--496",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The recent surge in popularity of crowdsourcing has
                 brought with it a new opportunity for engaging human
                 intelligence in the process of data analysis.
                 Crowdsourcing provides a fundamental mechanism for
                 enabling online workers to participate in tasks that
                 are either too difficult to be solved solely by a
                 computer or too expensive to employ experts to perform.
                 In the field of social science, four elements are
                 required to form a wise crowd --- Diversity of Opinion,
                 Independence, Decentralization and Aggregation.
                 However, while the other three elements are already
                 studied and implemented in current crowdsourcing
                 platforms, the 'Diversity of Opinion' has not been
                 functionally enabled. In this paper, we address the
                 algorithmic optimizations towards the diversity of
                 opinion of crowdsourcing marketplaces. From a
                 computational perspective, in order to build a wise
                 crowd, we need to quantitatively modeling the
                 diversity, and take it into consideration for
                 constructing the crowd. In a crowdsourcing marketplace,
                 we usually encounter two basic paradigms for worker
                 selection: building a crowd to wait for tasks to come
                 and selecting workers for a given task. Therefore, we
                 propose our Similarity-driven Model (S-Model) and
                 Task-driven Model (T-Model) for both of the paradigms.
                 Under both of the models, we propose efficient and
                 effective algorithms to enlist a budgeted number of
                 workers, which have the optimal diversity. We have
                 verified our solutions with extensive experiments on
                 both synthetic datasets and real data sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chatzistergiou:2015:RUR,
  author =       "Andreas Chatzistergiou and Marcelo Cintra and Stratis
                 D. Viglas",
  title =        "{REWIND}: recovery write-ahead system for in-memory
                 non-volatile data-structures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "497--508",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent non-volatile memory (NVM) technologies, such as
                 PCM, STT-MRAM and ReRAM, can act as both main memory
                 and storage. This has led to research into NVM
                 programming models, where persistent data structures
                 remain in memory and are accessed directly through CPU
                 loads and stores. Existing mechanisms for transactional
                 updates are not appropriate in such a setting as they
                 are optimized for block-based storage. We present
                 REWIND, a user-mode library approach to managing
                 transactional updates directly from user code written
                 in an imperative general-purpose language. REWIND
                 relies on a custom persistent in-memory data structure
                 for the log that supports recoverable operations on
                 itself. The scheme also employs a combination of
                 non-temporal updates, persistent memory fences, and
                 lightweight logging. Experimental results on synthetic
                 transactional workloads and TPC-C show the overhead of
                 REWIND compared to its non-recoverable equivalent to be
                 within a factor of only 1.5 and 1.39 respectively.
                 Moreover, REWIND outperforms state-of-the-art
                 approaches for data structure recoverability as well as
                 general purpose and NVM-aware DBMS-based recovery
                 schemes by up to two orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2015:ICS,
  author =       "Rong-Hua Li and Lu Qin and Jeffrey Xu Yu and Rui Mao",
  title =        "Influential community search in large networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "509--520",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Community search is a problem of finding densely
                 connected subgraphs that satisfy the query conditions
                 in a network, which has attracted much attention in
                 recent years. However, all the previous studies on
                 community search do not consider the influence of a
                 community. In this paper, we introduce a novel
                 community model called $k$-influential community based
                 on the concept of $k$-core, which can capture the
                 influence of a community. Based on the new community
                 model, we propose a linear-time online search algorithm
                 to find the top-$r$ $k$-influential communities in a
                 network. To further speed up the influential community
                 search algorithm, we devise a linear-space index
                 structure which supports efficient search of the
                 top-$r$ $k$-influential communities in optimal time. We
                 also propose an efficient algorithm to maintain the
                 index when the network is frequently updated. We
                 conduct extensive experiments on 7 real-world large
                 networks, and the results demonstrate the efficiency
                 and effectiveness of the proposed methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kim:2015:RSV,
  author =       "Albert Kim and Eric Blais and Aditya Parameswaran and
                 Piotr Indyk and Sam Madden and Ronitt Rubinfeld",
  title =        "Rapid sampling for visualizations with ordering
                 guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "521--532",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Visualizations are frequently used as a means to
                 understand trends and gather insights from datasets,
                 but often take a long time to generate. In this paper,
                 we focus on the problem of rapidly generating
                 approximate visualizations while preserving crucial
                 visual properties of interest to analysts. Our primary
                 focus will be on sampling algorithms that preserve the
                 visual property of ordering; our techniques will also
                 apply to some other visual properties. For instance,
                 our algorithms can be used to generate an approximate
                 visualization of a bar chart very rapidly, where the
                 comparisons between any two bars are correct. We
                 formally show that our sampling algorithms are
                 generally applicable and provably optimal in theory, in
                 that they do not take more samples than necessary to
                 generate the visualizations with ordering guarantees.
                 They also work well in practice, correctly ordering
                 output groups while taking orders of magnitude fewer
                 samples and much less time than conventional sampling
                 schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chang:2015:OEE,
  author =       "Lijun Chang and Xuemin Lin and Wenjie Zhang and
                 Jeffrey Xu Yu and Ying Zhang and Lu Qin",
  title =        "Optimal enumeration: efficient top-$k$ tree matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "533--544",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Driven by many real applications, graph pattern
                 matching has attracted a great deal of attention
                 recently. Consider that a twig-pattern matching may
                 result in an extremely large number of matches in a
                 graph; this may not only confuse users by providing too
                 many results but also lead to high computational costs.
                 In this paper, we study the problem of top-$k$ tree
                 pattern matching; that is, given a rooted tree $T$,
                 compute its top-$k$ matches in a directed graph $G$
                 based on the twig-pattern matching semantics. We
                 firstly present a novel and optimal enumeration
                 paradigm based on the principle of Lawler's procedure.
                 We show that our enumeration algorithm runs in $ O(n_T
                 + \log k)$ time in each round where $ n_T$ is the
                 number of nodes in $T$. Considering that the time
                 complexity to output a match of $T$ is $ O(n_T)$ and $
                 n_T \geq \log k$ in practice, our enumeration technique
                 is optimal. Moreover, the cost of generating top-$1$
                 match of $T$ in our algorithm is $ O(m_R)$ where $ m_R$
                 is the number of edges in the transitive closure of a
                 data graph $G$ involving all relevant nodes to $T$. $
                 O(m_R)$ is also optimal in the worst case without
                 pre-knowledge of $G$. Consequently, our algorithm is
                 optimal with the running time $ O(m_R + k(n_T + \log
                 k))$ in contrast to the time complexity $ O(m_R \log k
                 + k n_T (\log k + d_T))$ of the existing technique
                 where $ d_T$ is the maximal node degree in $T$.
                 Secondly, a novel priority based access technique is
                 proposed, which greatly reduces the number of edges
                 accessed and results in a significant performance
                 improvement. Finally, we apply our techniques to the
                 general form of top-$k$ graph pattern matching problem
                 (i.e., query is a graph) to improve the existing
                 techniques. Comprehensive empirical studies demonstrate
                 that our techniques may improve the existing techniques
                 by orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lazerson:2015:MDS,
  author =       "Arnon Lazerson and Izchak Sharfman and Daniel Keren
                 and Assaf Schuster and Minos Garofalakis and Vasilis
                 Samoladas",
  title =        "Monitoring distributed streams using convex
                 decompositions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "545--556",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Emerging large-scale monitoring applications rely on
                 continuous tracking of complex data-analysis queries
                 over collections of massive, physically-distributed
                 data streams. Thus, in addition to the space- and
                 time-efficiency requirements of conventional stream
                 processing (at each remote monitor site), effective
                 solutions also need to guarantee communication
                 efficiency (over the underlying communication network).
                 The complexity of the monitored query adds to the
                 difficulty of the problem --- this is especially true
                 for non-linear queries (e.g., joins), where no obvious
                 solutions exist for distributing the monitored
                 condition across sites. The recently proposed geometric
                 method, based on the notion of covering spheres, offers
                 a generic methodology for splitting an arbitrary
                 (non-linear) global condition into a collection of
                 local site constraints, and has been applied to massive
                 distributed stream-monitoring tasks, achieving
                 state-of-the-art performance. In this paper, we present
                 a far more general geometric approach, based on the
                 convex decomposition of an appropriate subset of the
                 domain of the monitoring query, and formally prove that
                 it is always guaranteed to perform at least as good as
                 the covering spheres method. We analyze our approach
                 and demonstrate its effectiveness for the important
                 case of sketch-based approximate tracking for norm,
                 range-aggregate, and join-aggregate queries, which have
                 numerous applications in streaming data analysis.
                 Experimental results on real-life data streams verify
                 the superiority of our approach in practical settings,
                 showing that it substantially outperforms the covering
                 spheres method.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2015:UGD,
  author =       "Kun Li and Daisy Zhe Wang and Alin Dobra and
                 Christopher Dudley",
  title =        "{UDA}-{GIST}: an in-database framework to unify
                 data-parallel and state-parallel analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "557--568",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Enterprise applications need sophisticated in-database
                 analytics in addition to traditional online analytical
                 processing from a database. To meet customers' pressing
                 demands, database vendors have been pushing advanced
                 analytical techniques into databases. Most major DBMSes
                 offer User-Defined Aggregate (UDA), a data-driven
                 operator, to implement many of the analytical
                 techniques in parallel. However, UDAs can not be used
                 to implement statistical algorithms such as Markov
                 chain Monte Carlo (MCMC), where most of the work is
                 performed by iterative transitions over a large state
                 that can not be naively partitioned due to data
                 dependency. Typically, this type of statistical
                 algorithm requires pre-processing to setup the large
                 state in the first place and demands post-processing
                 after the statistical inference. This paper presents
                 General Iterative State Transition (GIST), a new
                 database operator for parallel iterative state
                 transitions over large states. GIST receives a state
                 constructed by a UDA, and then performs rounds of
                 transitions on the state until it converges. A final
                 UDA performs post-processing and result extraction. We
                 argue that the combination of UDA and GIST (UDA-GIST)
                 unifies data-parallel and state-parallel processing in
                 a single system, thus significantly extending the
                 analytical capabilities of DBMSes. We exemplify the
                 framework through two high-profile applications:
                 cross-document coreference and image denoising. We show
                 that the in-database framework allows us to tackle a 27
                 times larger problem than solved by the
                 state-of-the-art for the first application and achieves
                 43 times speedup over the state-of-the-art for the
                 second application.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2015:EPP,
  author =       "Weiren Yu and Julie A. McCann",
  title =        "Efficient partial-pairs {SimRank} search on large
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "569--580",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The assessment of node-to-node similarities based on
                 graph topology arises in a myriad of applications,
                 e.g., web search. SimRank is a notable measure of this
                 type, with the intuition that ``two nodes are similar
                 if their in-neighbors are similar''. While most
                 existing work retrieving SimRank only considers
                 all-pairs SimRank $ s(*, *) $ and single-source SimRank
                 $ s(*, j) $ (scores between every node and query $j$),
                 there are appealing applications for partial-pairs
                 SimRank, e.g., similarity join. Given two node subsets
                 $A$ and $B$ in a graph, partial-pairs SimRank
                 assessment aims to retrieve only $ \{ s(a, b) \}_{
                 \forall a \epsilon A, \forall b \epsilon B}$. However,
                 the best-known solution appears not self-contained
                 since it hinges on the premise that the SimRank scores
                 with node-pairs in an $h$-go cover set must be given
                 beforehand. This paper focuses on efficient assessment
                 of partial-pairs SimRank in a self-contained manner.
                 (1) We devise a novel ``seed germination'' model that
                 computes partial-pairs SimRank in $ O(k | E | \{ \min |
                 A |, | B | \})$ time and $ O(| E | + k | V |)$ memory
                 for $k$ iterations on a graph of $ | V |$ nodes and $ |
                 E |$ edges. (2) We further eliminate unnecessary edge
                 access to improve the time of partial-pairs SimRank to
                 $ O(m \{ \min | A |, | B | \})$, where $ m \geq \{ \min
                 k | E |, \Delta^{2 k} \} $, and $ \Delta $ is the
                 maximum degree. (3) We show that our partial-pairs
                 SimRank model also can handle the computations of
                 all-pairs and single-source SimRanks. (4) We
                 empirically verify that our algorithms are (a) $ 38
                 \times $ faster than the best-known competitors, and
                 (b) memory-efficient, allowing scores to be assessed
                 accurately on graphs with tens of millions of links.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gatterbauer:2015:LSP,
  author =       "Wolfgang Gatterbauer and Stephan G{\"u}nnemann and
                 Danai Koutra and Christos Faloutsos",
  title =        "Linearized and single-pass belief propagation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "581--592",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "How can we tell when accounts are fake or real in a
                 social network? And how can we tell which accounts
                 belong to liberal, conservative or centrist users?
                 Often, we can answer such questions and label nodes in
                 a network based on the labels of their neighbors and
                 appropriate assumptions of homophily (``birds of a
                 feather flock together'') or heterophily (``opposites
                 attract''). One of the most widely used methods for
                 this kind of inference is Belief Propagation (BP) which
                 iteratively propagates the information from a few nodes
                 with explicit labels throughout a network until
                 convergence. A well-known problem with BP, however, is
                 that there are no known exact guarantees of convergence
                 in graphs with loops. This paper introduces Linearized
                 Belief Propagation (LinBP), a linearization of BP that
                 allows a closed-form solution via intuitive matrix
                 equations and, thus, comes with exact convergence
                 guarantees. It handles homophily, heterophily, and more
                 general cases that arise in multi-class settings. Plus,
                 it allows a compact implementation in SQL. The paper
                 also introduces Single-pass Belief Propagation (SBP), a
                 localized (or ``myopic'') version of LinBP that
                 propagates information across every edge at most once
                 and for which the final class assignments depend only
                 on the nearest labeled neighbors. In addition, SBP
                 allows fast incremental updates in dynamic networks.
                 Our runtime experiments show that LinBP and SBP are
                 orders of magnitude faster than standard BP, while
                 leading to almost identical node labels.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Do:2015:MRM,
  author =       "Loc Do and Hady W. Lauw and Ke Wang",
  title =        "Mining revenue-maximizing bundling configuration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "593--604",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With greater prevalence of social media, there is an
                 increasing amount of user-generated data revealing
                 consumer preferences for various products and services.
                 Businesses seek to harness this wealth of data to
                 improve their marketing strategies. Bundling, or
                 selling two or more items for one price is a
                 highly-practiced marketing strategy. In this paper, we
                 address the bundle configuration problem from the
                 data-driven perspective. Given a set of items in a
                 seller's inventory, we seek to determine which items
                 should belong to which bundle so as to maximize the
                 total revenue, by mining consumer preferences data. We
                 show that this problem is NP-hard when bundles are
                 allowed to contain more than two items. Therefore, we
                 describe an optimal solution for bundle sizes up to two
                 items, and propose two heuristic solutions for bundles
                 of any larger size. We investigate the effectiveness
                 and the efficiency of the proposed algorithms through
                 experimentations on real-life rating-based preferences
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2015:RKN,
  author =       "Shiyu Yang and Muhammad Aamir Cheema and Xuemin Lin
                 and Wei Wang",
  title =        "Reverse $k$ nearest neighbors query processing:
                 experiments and analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "605--616",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a set of users, a set of facilities and a query
                 facility $q$, a reverse $k$ nearest neighbors (R $k$
                 NN) query returns every user u for which the query is
                 one of its $k$ closest facilities. R $k$ NN queries
                 have been extensively studied under a variety of
                 settings and many sophisticated algorithms have been
                 proposed to answer these queries. However, the existing
                 experimental studies suffer from a few limitations. For
                 example, some studies estimate the I/O cost by charging
                 a fixed penalty per I/O and we show that this may be
                 misleading. Also, the existing studies either use an
                 extremely small buffer or no buffer at all which puts
                 some algorithms at serious disadvantage. We show that
                 the performance of these algorithms is significantly
                 improved even when a small buffer (containing 100
                 pages) is used. Finally, in each of the existing
                 studies, the proposed algorithm is mainly compared only
                 with its predecessor assuming that it was the best
                 algorithm at the time which is not necessarily true as
                 shown in our experimental study. Motivated by these
                 limitations, we present a comprehensive experimental
                 study that addresses these limitations and compares
                 some of the most notable algorithms under a wide
                 variety of settings. Furthermore, we also present a
                 carefully developed filtering strategy that
                 significantly improves TPL which is one of the most
                 popular R $k$ NN algorithms. Specifically, the
                 optimized version is up to 20 times faster than the
                 original version and reduces its I/O cost up to two
                 times.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2015:EVR,
  author =       "Xuguang Ren and Junhu Wang",
  title =        "Exploiting vertex relationships in speeding up
                 subgraph isomorphism over large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "617--628",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Subgraph Isomorphism is a fundamental problem in graph
                 data processing. Most existing subgraph isomorphism
                 algorithms are based on a backtracking framework which
                 computes the solutions by incrementally matching all
                 query vertices to candidate data vertices. However, we
                 observe that extensive duplicate computation exists in
                 these algorithms, and such duplicate computation can be
                 avoided by exploiting relationships between data
                 vertices. Motivated by this, we propose a novel
                 approach, BoostIso, to reduce duplicate computation.
                 Our extensive experiments with real datasets show that,
                 after integrating our approach, most existing subgraph
                 isomorphism algorithms can be speeded up significantly,
                 especially for some graphs with intensive vertex
                 relationships, where the improvement can be up to
                 several orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gatterbauer:2015:ALI,
  author =       "Wolfgang Gatterbauer and Dan Suciu",
  title =        "Approximate lifted inference with probabilistic
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "629--640",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper proposes a new approach for approximate
                 evaluation of \#P-hard queries with probabilistic
                 databases. In our approach, every query is evaluated
                 entirely in the database engine by evaluating a fixed
                 number of query plans, each providing an upper bound on
                 the true probability, then taking their minimum. We
                 provide an algorithm that takes into account important
                 schema information to enumerate only the minimal
                 necessary plans among all possible plans. Importantly,
                 this algorithm is a strict generalization of all known
                 results of PTIME self-join-free conjunctive queries: A
                 query is safe if and only if our algorithm returns one
                 single plan. We also apply three relational query
                 optimization techniques to evaluate all minimal safe
                 plans very fast. We give a detailed experimental
                 evaluation of our approach and, in the process, provide
                 a new way of thinking about the value of probabilistic
                 methods over non-probabilistic methods for ranking
                 query answers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vesdapunt:2015:ECA,
  author =       "Norases Vesdapunt and Kedar Bellare and Nilesh Dalvi",
  title =        "Errata for {``Crowdsourcing algorithms for entity
                 resolution''}: {(PVLDB {\bf 7}(12): 1071--1082)}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "5",
  pages =        "641--641",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Feb 9 18:24:35 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We discovered that there was a duplicate figure in our
                 paper. We accidentally put Figure 13(b) for Figure
                 12(b). We have provided the correct Figure 12(b) above
                 (See Figure 1). Figure 1 plots the recall of various
                 strategies as a function of the number of questions
                 asked for Places dataset. There was no error in the
                 discussion in our paper (See Section 6.2.1 in our paper
                 for more details).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jha:2015:IMM,
  author =       "Saurabh Jha and Bingsheng He and Mian Lu and Xuntao
                 Cheng and Huynh Phung Huynh",
  title =        "Improving main memory hash joins on {Intel Xeon Phi}
                 processors: an experimental approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "6",
  pages =        "642--653",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 10 17:42:37 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern processor technologies have driven new designs
                 and implementations in main-memory hash joins.
                 Recently, Intel Many Integrated Core (MIC)
                 co-processors (commonly known as Xeon Phi) embrace
                 emerging x86 single-chip many-core techniques. Compared
                 with contemporary multi-core CPUs, Xeon Phi has quite
                 different architectural features: wider SIMD
                 instructions, many cores and hardware contexts, as well
                 as lower-frequency in-order cores. In this paper, we
                 experimentally revisit the state-of-the-art hash join
                 algorithms on Xeon Phi co-processors. In particular, we
                 study two camps of hash join algorithms:
                 hardware-conscious ones that advocate careful tailoring
                 of the join algorithms to underlying hardware
                 architectures and hardware-oblivious ones that omit
                 such careful tailoring. For each camp, we study the
                 impact of architectural features and software
                 optimizations on Xeon Phi in comparison with results on
                 multi-core CPUs. Our experiments show two major
                 findings on Xeon Phi, which are quantitatively
                 different from those on multi-core CPUs. First, the
                 impact of architectural features and software
                 optimizations has quite different behavior on Xeon Phi
                 in comparison with those on the CPU, which calls for
                 new optimization and tuning on Xeon Phi. Second,
                 hardware oblivious algorithms can outperform hardware
                 conscious algorithms on a wide parameter window. These
                 two findings further shed light on the design and
                 implementation of query processing on new-generation
                 single-chip many-core technologies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hammoud:2015:DDR,
  author =       "Mohammad Hammoud and Dania Abed Rabbou and Reza Nouri
                 and Seyed-Mehdi-Reza Beheshti and Sherif Sakr",
  title =        "{DREAM}: distributed {RDF} engine with adaptive query
                 planner and minimal communication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "6",
  pages =        "654--665",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 10 17:42:37 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Resource Description Framework (RDF) and SPARQL
                 query language are gaining wide popularity and
                 acceptance. In this paper, we present DREAM, a
                 distributed and adaptive RDF system. As opposed to
                 existing RDF systems, DREAM avoids partitioning RDF
                 datasets and partitions only SPARQL queries. By not
                 partitioning datasets, DREAM offers a general paradigm
                 for different types of pattern matching queries, and
                 entirely averts intermediate data shuffling (only
                 auxiliary data are shuffled). Besides, by partitioning
                 queries, DREAM presents an adaptive scheme, which
                 automatically runs queries on various numbers of
                 machines depending on their complexities. Hence, in
                 essence DREAM combines the advantages of the
                 state-of-the-art centralized and distributed RDF
                 systems, whereby data communication is avoided and
                 cluster resources are aggregated. Likewise, it
                 precludes their disadvantages, wherein system resources
                 are limited and communication overhead is typically
                 hindering. DREAM achieves all its goals via employing a
                 novel graph-based, rule-oriented query planner and a
                 new cost model. We implemented DREAM and conducted
                 comprehensive experiments on a private cluster and on
                 the Amazon EC2 platform. Results show that DREAM can
                 significantly outperform three related popular RDF
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2015:OTA,
  author =       "Shuo Chen and Ju Fan and Guoliang Li and Jianhua Feng
                 and Kian-lee Tan and Jinhui Tang",
  title =        "Online topic-aware influence maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "6",
  pages =        "666--677",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 10 17:42:37 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Influence maximization, whose objective is to select
                 $k$ users (called seeds) from a social network such
                 that the number of users influenced by the seeds
                 (called influence spread) is maximized, has attracted
                 significant attention due to its widespread
                 applications, such as viral marketing and rumor
                 control. However, in real-world social networks, users
                 have their own interests (which can be represented as
                 topics) and are more likely to be influenced by their
                 friends (or friends' friends) with similar topics. We
                 can increase the influence spread by taking into
                 consideration topics. To address this problem, we study
                 topic-aware influence maximization, which, given a
                 topic-aware influence maximization (TIM) query, finds
                 $k$ seeds from a social network such that the
                 topic-aware influence spread of the $k$ seeds is
                 maximized. Our goal is to enable online TIM queries.
                 Since the topic-aware influence maximization problem is
                 NP-hard, we focus on devising efficient algorithms to
                 achieve instant performance while keeping a high
                 influence spread. We utilize a maximum influence
                 arborescence (MIA) model to approximate the computation
                 of influence spread. To efficiently find $k$ seeds
                 under the MIA model, we first propose a best-effort
                 algorithm with $ 1 - 1 / e$ approximation ratio, which
                 estimates an upper bound of the topic-aware influence
                 of each user and utilizes the bound to prune large
                 numbers of users with small influence. We devise
                 effective techniques to estimate tighter upper bounds.
                 We then propose a faster topic-sample-based algorithm
                 with $ \epsilon \cdot (1 - 1 / e)$ approximation ratio
                 for any $ \epsilon \in (0, 1]$, which materializes the
                 influence spread of some topic-distribution samples and
                 utilizes the materialized information to avoid
                 computing the actual influence of users with small
                 influences. Experimental results show that our methods
                 significantly outperform baseline approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nazi:2015:WWF,
  author =       "Azade Nazi and Zhuojie Zhou and Saravanan
                 Thirumuruganathan and Nan Zhang and Gautam Das",
  title =        "Walk, not wait: faster sampling over online social
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "6",
  pages =        "678--689",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 10 17:42:37 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we introduce a novel, general purpose,
                 technique for faster sampling of nodes over an online
                 social network. Specifically, unlike traditional random
                 walks which wait for the convergence of sampling
                 distribution to a predetermined target distribution ---
                 a waiting process that incurs a high query cost --- we
                 develop WALK-ESTIMATE, which starts with a much shorter
                 random walk, and then proactively estimate the sampling
                 probability for the node taken before using
                 acceptance--rejection sampling to adjust the sampling
                 probability to the predetermined target distribution.
                 We present a novel backward random walk technique which
                 provides provably unbiased estimations for the sampling
                 probability, and demonstrate the superiority of
                 WALK-ESTIMATE over traditional random walks through
                 theoretical analysis and extensive experiments over
                 real world online social networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Benedikt:2015:QAP,
  author =       "Michael Benedikt and Julien Leblay and Efthymia
                 Tsamoura",
  title =        "Querying with access patterns and integrity
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "6",
  pages =        "690--701",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 10 17:42:37 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional query processing involves a search for
                 plans formed by applying algebraic operators on top of
                 primitives representing access to relations in the
                 input query. But many querying scenarios involve two
                 interacting issues that complicate the search. On the
                 one hand, the search space may be limited by access
                 restrictions associated with the interfaces to
                 datasources, which require certain parameters to be
                 given as inputs. On the other hand, the search space
                 may be extended through the presence of integrity
                 constraints that relate sources to each other, allowing
                 for plans that do not match the structure of the user
                 query. In this paper we present the first optimization
                 approach that attacks both these difficulties within a
                 single framework, presenting a system in which
                 classical cost-based join optimization is extended to
                 support both access-restrictions and constraints.
                 Instead of iteratively exploring subqueries of the
                 input query, our optimizer explores a space of proofs
                 that witness the answering of the query, where each
                 proof has a direct correspondence with a query plan.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tangwongsan:2015:GIS,
  author =       "Kanat Tangwongsan and Martin Hirzel and Scott
                 Schneider and Kun-Lung Wu",
  title =        "General incremental sliding-window aggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "702--713",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Stream processing is gaining importance as more data
                 becomes available in the form of continuous streams and
                 companies compete to promptly extract insights from
                 them. In such applications, sliding-window aggregation
                 is a central operator, and incremental aggregation
                 helps avoid the performance penalty of re-aggregating
                 from scratch for each window change. This paper
                 presents Reactive Aggregator (RA), a new framework for
                 incremental sliding-window aggregation. RA is general
                 in that it does not require aggregation functions to be
                 invertible or commutative, and it does not require
                 windows to be FIFO. We implemented RA as a drop-in
                 replacement for the Aggregate operator of a commercial
                 streaming engine. Given m updates on a window of size
                 $n$, RA has an algorithmic complexity of $ O(m + m \log
                 (n / m))$, rivaling the best prior algorithms for any
                 $m$. Furthermore, RA's implementation minimizes
                 overheads from allocation and pointer traversals by
                 using a single flat array.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lei:2015:SER,
  author =       "Chuan Lei and Zhongfang Zhuang and Elke A.
                 Rundensteiner and Mohamed Eltabakh",
  title =        "Shared execution of recurring workloads in
                 {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "714--725",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increasing complexity of data-intensive
                 MapReduce workloads, Hadoop must often accommodate
                 hundreds or even thousands of recurring analytics
                 queries that periodically execute over frequently
                 updated datasets, e.g., latest stock transactions, new
                 log files, or recent news feeds. For many applications,
                 such recurring queries come with user-specified
                 service-level agreements (SLAs), commonly expressed as
                 the maximum allowed latency for producing results
                 before their merits decay. The recurring nature of
                 these emerging workloads combined with their SLA
                 constraints make it challenging to share and optimize
                 their execution. While some recent efforts on multi-job
                 optimization in MapReduce have emerged, they focus on
                 only sharing work among ad-hoc jobs on static datasets.
                 Unfortunately, these sharing techniques neither take
                 the recurring nature of the queries into account nor
                 guarantee the satisfaction of the SLA requirements. In
                 this work, we propose the first scalable multi-query
                 sharing engine tailored for recurring workloads in the
                 MapReduce infrastructure, called ``Helix''. Helix
                 deploys new sliced window-alignment techniques to
                 create sharing opportunities among recurring queries
                 without introducing additional I/O overheads or
                 unnecessary data scans. And then, Helix introduces a
                 cost/benefit model for creating a sharing plan among
                 the recurring queries, and a scheduling strategy for
                 executing them to maximize the SLA satisfaction. Our
                 experimental results over real-world datasets confirm
                 that Helix significantly outperforms the state-of-art
                 techniques by an order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Narasayya:2015:SBP,
  author =       "Vivek Narasayya and Ishai Menache and Mohit Singh and
                 Feng Li and Manoj Syamala and Surajit Chaudhuri",
  title =        "Sharing buffer pool memory in multi-tenant relational
                 database-as-a-service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "726--737",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Relational database-as-a-service (DaaS) providers need
                 to rely on multi-tenancy and resource sharing among
                 tenants, since statically reserving resources for a
                 tenant is not cost effective. A major consequence of
                 resource sharing is that the performance of one tenant
                 can be adversely affected by resource demands of other
                 co-located tenants. One such resource that is essential
                 for good performance of a tenant's workload is buffer
                 pool memory. In this paper, we study the problem of how
                 to effectively share buffer pool memory in multi-tenant
                 relational DaaS. We first develop an SLA framework that
                 defines and enforces accountability of the service
                 provider to the tenant even when buffer pool memory is
                 not statically reserved on behalf of the tenant. Next,
                 we present a novel buffer pool page replacement
                 algorithm (MT-LRU) that builds upon theoretical
                 concepts from weighted online caching, and is designed
                 for multi-tenant scenarios involving SLAs and
                 overbooking. MT-LRU generalizes the LRU-K algorithm
                 which is commonly used in relational database systems.
                 We have prototyped our techniques inside a commercial
                 DaaS engine and extensive experiments demonstrate the
                 effectiveness of our solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gao:2015:AWQ,
  author =       "Yunjun Gao and Qing Liu and Gang Chen and Baihua Zheng
                 and Linlin Zhou",
  title =        "Answering why-not questions on reverse top-$k$
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "738--749",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Why-not questions, which aim to seek clarifications on
                 the missing tuples for query results, have recently
                 received considerable attention from the database
                 community. In this paper, we systematically explore
                 why-not questions on reverse top-$k$ queries, owing to
                 its importance in multi-criteria decision making. Given
                 an initial reverse top-$k$ query and a missing/why-not
                 weighting vector set W$_m$ that is absent from the
                 query result, why-not questions on reverse top-$k$
                 queries explain why W$_m$ does not appear in the query
                 result and provide suggestions on how to refine the
                 initial query with minimum penalty to include W$_m$ in
                 the refined query result. We first formalize why-not
                 questions on reverse top-$k$ queries and reveal their
                 semantics, and then propose a unified framework called
                 WQRTQ to answer why-not questions on both monochromatic
                 and bichromatic reverse top-$k$ queries. Our framework
                 offers three solutions, namely, (i) modifying a query
                 point $q$, (ii) modifying a why-not weighting vector
                 set W$_m$ and a parameter $k$, and (iii) modifying $q$,
                 W$_m$, and $k$ simultaneously, to cater for different
                 application scenarios. Extensive experimental
                 evaluation using both real and synthetic data sets
                 verifies the effectiveness and efficiency of the
                 presented algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papadopoulos:2015:PAP,
  author =       "Dimitrios Papadopoulos and Charalampos Papamanthou and
                 Roberto Tamassia and Nikos Triandopoulos",
  title =        "Practical authenticated pattern matching with optimal
                 proof size",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "750--761",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We address the problem of authenticating pattern
                 matching queries over textual data that is outsourced
                 to an untrusted cloud server. By employing
                 cryptographic accumulators in a novel optimal
                 integrity-checking tool built directly over a suffix
                 tree, we design the first authenticated data structure
                 for verifiable answers to pattern matching queries
                 featuring fast generation of constant-size proofs. We
                 present two main applications of our new construction
                 to authenticate: (i) pattern matching queries over text
                 documents, and (ii) exact path queries over XML
                 documents. Answers to queries are verified by proofs of
                 size at most 500 bytes for text pattern matching, and
                 at most 243 bytes for exact path XML search,
                 independently of the document or answer size. By
                 design, our authentication schemes can also be
                 parallelized to offer extra efficiency during data
                 outsourcing. We provide a detailed experimental
                 evaluation of our schemes showing that for both
                 applications the times required to compute and verify a
                 proof are very small --- e.g., it takes less than $ 10
                 \mu $ s to generate a proof for a pattern (mis)match of
                 $ 10^2 $ characters in a text of $ 10^6 $ characters,
                 once the query has been evaluated.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Loghin:2015:PSB,
  author =       "Dumitrel Loghin and Bogdan Marius Tudor and Hao Zhang
                 and Beng Chin Ooi and Yong Meng Teo",
  title =        "A performance study of big data on small nodes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "762--773",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The continuous increase in volume, variety and
                 velocity of Big Data exposes datacenter resource
                 scaling to an energy utilization problem.
                 Traditionally, datacenters employ x86-64 (big) server
                 nodes with power usage of tens to hundreds of Watts.
                 But lately, low-power (small) systems originally
                 developed for mobile devices have seen significant
                 improvements in performance. These improvements could
                 lead to the adoption of such small systems in servers,
                 as announced by major industry players. In this
                 context, we systematically conduct a performance study
                 of Big Data execution on small nodes in comparison with
                 traditional big nodes, and present insights that would
                 be useful for future development. We run Hadoop
                 MapReduce, MySQL and in-memory Shark workloads on
                 clusters of ARM big. LITTLE boards and Intel Xeon
                 server systems. We evaluate execution time, energy
                 usage and total cost of running the workloads on
                 self-hosted ARM and Xeon nodes. Our study shows that
                 there is no one size fits all rule for judging the
                 efficiency of executing Big Data workloads on small and
                 big nodes. But small memory size, low memory and I/O
                 bandwidths, and software immaturity concur in canceling
                 the lower-power advantage of ARM servers. We show that
                 I/O-intensive MapReduce workloads are more
                 energy-efficient to run on Xeon nodes. In contrast,
                 database query processing is always more
                 energy-efficient on ARM servers, at the cost of
                 slightly lower throughput. With minor software
                 modifications, CPU-intensive MapReduce workloads are
                 almost four times cheaper to execute on ARM servers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papenbrock:2015:DCB,
  author =       "Thorsten Papenbrock and Sebastian Kruse and
                 Jorge-Arnulfo Quian{\'e}-Ruiz and Felix Naumann",
  title =        "Divide \& conquer-based inclusion dependency
                 discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "774--785",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The discovery of all inclusion dependencies (INDs) in
                 a dataset is an important part of any data profiling
                 effort. Apart from the detection of foreign key
                 relationships, INDs can help to perform data
                 integration, query optimization, integrity checking, or
                 schema (re-)design. However, the detection of INDs gets
                 harder as datasets become larger in terms of number of
                 tuples as well as attributes. To this end, we propose
                 Binder, an IND detection system that is capable of
                 detecting both unary and $n$-ary INDs. It is based on a
                 divide \& conquer approach, which allows to handle very
                 large datasets --- an important property on the face of
                 the ever increasing size of today's data. In contrast
                 to most related works, we do not rely on existing
                 database functionality nor assume that inspected
                 datasets fit into main memory. This renders Binder an
                 efficient and scalable competitor. Our exhaustive
                 experimental evaluation shows the high superiority of
                 Binder over the state-of-the-art in both unary (Spider)
                 and $n$-ary (Mind) IND discovery. Binder is up to $ 26
                 \times $ faster than Spider and more than $ 2500 \times
                 $ faster than Mind.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2015:PBT,
  author =       "Shimin Chen and Qin Jin",
  title =        "Persistent {B+}-trees in non-volatile main memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "786--797",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Computer systems in the near future are expected to
                 have Non-Volatile Main Memory (NVMM), enabled by a new
                 generation of Non-Volatile Memory (NVM) technologies,
                 such as Phase Change Memory (PCM), STT-MRAM, and
                 Memristor. The non-volatility property has the promise
                 to persist in-memory data structures for instantaneous
                 failure recovery. However, realizing such promise
                 requires a careful design to ensure that in-memory data
                 structures are in known consistent states after
                 failures. This paper studies persistent in-memory $
                 B^+$-Trees as $ B^+$-Trees are widely used in database
                 and data-intensive systems. While traditional
                 techniques, such as undo-redo logging and shadowing,
                 support persistent $ B^+$-Trees, we find that they
                 incur drastic performance overhead because of extensive
                 NVM writes and CPU cache flush operations. PCM-friendly
                 $ B^+$-Trees with unsorted leaf nodes help mediate this
                 issue, but the remaining overhead is still large. In
                 this paper, we propose write atomic $ B^+$-Trees (w$
                 B^+$-Trees), a new type of main-memory $ B^+$-Trees,
                 that aim to reduce such overhead as much as possible. $
                 w B^+$-Tree nodes employ a small indirect slot array
                 and/or a bitmap so that most insertions and deletions
                 do not require the movement of index entries. In this
                 way, $ w B^+$-Trees can achieve node consistency either
                 through atomic writes in the nodes or by redo-only
                 logging. We model fast NVM using DRAM on a real machine
                 and model PCM using a cycle-accurate simulator.
                 Experimental results show that compared with previous
                 persistent $ B^+$-Tree solutions, $ w B^+$-Trees
                 achieve up to $ 8.8 \times $ speedups on DRAM-like fast
                 NVM and up to $ 27.1 \times $ speedups on PCM for
                 insertions and deletions while maintaining good search
                 performance. Moreover, we replaced Memcached's internal
                 hash index with tree indices. Our real machine
                 Memcached experiments show that $ w B^+$-Trees achieve
                 up to 3.8X improvements over previous persistent tree
                 structures with undo-redo logging or shadowing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2015:RLC,
  author =       "Yubao Wu and Ruoming Jin and Jing Li and Xiang Zhang",
  title =        "Robust local community detection: on free rider effect
                 and its elimination",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "798--809",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a large network, local community detection aims
                 at finding the community that contains a set of query
                 nodes and also maximizes (minimizes) a goodness metric.
                 This problem has recently drawn intense research
                 interest. Various goodness metrics have been proposed.
                 However, most existing metrics tend to include
                 irrelevant subgraphs in the detected local community.
                 We refer to such irrelevant subgraphs as free riders.
                 We systematically study the existing goodness metrics
                 and provide theoretical explanations on why they may
                 cause the free rider effect. We further develop a query
                 biased node weighting scheme to reduce the free rider
                 effect. In particular, each node is weighted by its
                 proximity to the query node. We define a query biased
                 density metric to integrate the edge and node weights.
                 The query biased densest subgraph, which has the
                 largest query biased density, will shift to the
                 neighborhood of the query nodes after node weighting.
                 We then formulate the query biased densest connected
                 subgraph (QDC) problem, study its complexity, and
                 provide efficient algorithms to solve it. We perform
                 extensive experiments on a variety of real and
                 synthetic networks to evaluate the effectiveness and
                 efficiency of the proposed methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2015:UCC,
  author =       "Hua Fan and Aditya Ramaraju and Marlon McKenzie and
                 Wojciech Golab and Bernard Wong",
  title =        "Understanding the causes of consistency anomalies in
                 {Apache Cassandra}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "810--813",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A recent paper on benchmarking eventual consistency
                 showed that when a constant workload is applied against
                 Cassandra, the staleness of values returned by read
                 operations exhibits interesting but unexplained
                 variations when plotted against time. In this paper we
                 reproduce this phenomenon and investigate in greater
                 depth the low-level mechanisms that give rise to stale
                 reads. We show that the staleness spikes exhibited by
                 Cassandra are strongly correlated with garbage
                 collection, particularly the ``stop-the-world'' phase
                 which pauses all application threads in a Java virtual
                 machine. We show experimentally that the staleness
                 spikes can be virtually eliminated by delaying read
                 operations artificially at servers immediately after a
                 garbage collection pause. In our experiments this
                 yields more than a 98\% reduction in the number of
                 consistency anomalies that exceed 5ms, and has
                 negligible impact on throughput and latency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aslay:2015:VMM,
  author =       "Cigdem Aslay and Wei Lu and Francesco Bonchi and Amit
                 Goyal and Laks V. S. Lakshmanan",
  title =        "Viral marketing meets social advertising: ad
                 allocation with minimum regret",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "7",
  pages =        "814--825",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:04:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Social advertisement is one of the fastest growing
                 sectors in the digital advertisement landscape: ads in
                 the form of promoted posts are shown in the feed of
                 users of a social networking platform, along with
                 normal social posts; if a user clicks on a promoted
                 post, the host (social network owner) is paid a fixed
                 amount from the advertiser. In this context, allocating
                 ads to users is typically performed by maximizing
                 click-through-rate, i.e., the likelihood that the user
                 will click on the ad. However, this simple strategy
                 fails to leverage the fact the ads can propagate
                 virally through the network, from endorsing users to
                 their followers. In this paper, we study the problem of
                 allocating ads to users through the viral-marketing
                 lenses. We show that allocation that takes into account
                 the propensity of ads for viral propagation can achieve
                 significantly better performance. However, uncontrolled
                 virality could be undesirable for the host as it
                 creates room for exploitation by the advertisers:
                 hoping to tap uncontrolled virality, an advertiser
                 might declare a lower budget for its marketing
                 campaign, aiming at the same large outcome with a
                 smaller cost. This creates a challenging trade-off: on
                 the one hand, the host aims at leveraging virality and
                 the network effect to improve advertising efficacy,
                 while on the other hand the host wants to avoid giving
                 away free service due to uncontrolled virality. We
                 formalize this as the problem of ad allocation with
                 minimum regret, which we show is NP-hard and
                 inapproximable w.r.t. any factor. However, we devise an
                 algorithm that provides approximation guarantees w.r.t.
                 the total budget of all advertisers. We develop a
                 scalable version of our approximation algorithm, which
                 we extensively test on four real-world data sets,
                 confirming that our algorithm delivers high quality
                 solutions, is scalable, and significantly outperforms
                 several natural baselines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chu:2015:ASD,
  author =       "Lingyang Chu and Shuhui Wang and Siyuan Liu and
                 Qingming Huang and Jian Pei",
  title =        "{ALID}: scalable dominant cluster detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "8",
  pages =        "826--837",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:02:29 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Detecting dominant clusters is important in many
                 analytic applications. The state-of-the-art methods
                 find dense subgraphs on the affinity graph as dominant
                 clusters. However, the time and space complexities of
                 those methods are dominated by the construction of
                 affinity graph, which is quadratic with respect to the
                 number of data points, and thus are impractical on
                 large data sets. To tackle the challenge, in this
                 paper, we apply Evolutionary Game Theory (EGT) and
                 develop a scalable algorithm, Approximate Localized
                 Infection Immunization Dynamics (ALID). The major idea
                 is to perform Localized Infection Immunization Dynamics
                 (LID) to find dense subgraphs within local ranges of
                 the affinity graph. LID is further scaled up with
                 guaranteed high efficiency and detection quality by an
                 estimated Region of Interest (ROI) and a Candidate
                 Infective Vertex Search method (CIVS). ALID only
                 constructs small local affinity graphs and has time
                 complexity $ O(C(a^* + \delta) n) $ and space
                 complexity $ O(a^*(a^* + \delta)) $, where $ a^* $ is
                 the size of the largest dominant cluster, and $ C \ll n
                 $ and $ \delta \ll n $ are small constants. We
                 demonstrate by extensive experiments on both synthetic
                 data and real world data that ALID achieves the
                 state-of-the-art detection quality with much lower time
                 and space cost on single machine. We also demonstrate
                 the encouraging parallelization performance of ALID by
                 implementing the Parallel ALID (PALID) on Apache Spark.
                 PALID processes 50 million SIFT data points in 2.29
                 hours, achieving a speedup ratio of 7.51 with 8
                 executors.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shao:2015:ESS,
  author =       "Yingxia Shao and Bin Cui and Lei Chen and Mingming Liu
                 and Xing Xie",
  title =        "An efficient similarity search framework for {SimRank}
                 over large dynamic graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "8",
  pages =        "838--849",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:02:29 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "SimRank is an important measure of vertex-pair
                 similarity according to the structure of graphs. The
                 similarity search based on SimRank is an important
                 operation for identifying similar vertices in a graph
                 and has been employed in many data analysis
                 applications. Nowadays, graphs in the real world become
                 much larger and more dynamic. The existing solutions
                 for similarity search are expensive in terms of time
                 and space cost. None of them can efficiently support
                 similarity search over large dynamic graphs. In this
                 paper, we propose a novel two-stage random-walk
                 sampling framework (TSF) for SimRank-based similarity
                 search (e.g., top-$k$ search). In the preprocessing
                 stage, TSF samples a set of one-way graphs to index raw
                 random walks in a novel manner within $ O(N R_g)$ time
                 and space, where $N$ is the number of vertices and $
                 R_g$ is the number of one-way graphs. The one-way graph
                 can be efficiently updated in accordance with the graph
                 modification, thus TSF is well suited to dynamic
                 graphs. During the query stage, TSF can search similar
                 vertices fast by naturally pruning unqualified vertices
                 based on the connectivity of one-way graphs.
                 Furthermore, with additional $ R_q$ samples, TSF can
                 estimate the SimRank score with probability [EQUATION]
                 if the error of approximation is bounded by $ 1 -
                 \epsilon $. Finally, to guarantee the scalability of
                 TSF, the one-way graphs can also be compactly stored on
                 the disk when the memory is limited. Extensive
                 experiments have demonstrated that TSF can handle
                 dynamic billion-edge graphs with high performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ahmad:2015:CMD,
  author =       "Muhammad Yousuf Ahmad and Bettina Kemme",
  title =        "Compaction management in distributed key--value
                 datastores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "8",
  pages =        "850--861",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:02:29 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Compactions are a vital maintenance mechanism used by
                 datastores based on the log-structured merge-tree to
                 counter the continuous buildup of data files under
                 update-intensive workloads. While compactions help keep
                 read latencies in check over the long run, this comes
                 at the cost of significantly degraded read performance
                 over the course of the compaction itself. In this
                 paper, we offer an in-depth analysis of
                 compaction-related performance overheads and propose
                 techniques for their mitigation. We offload large,
                 expensive compactions to a dedicated compaction server
                 to allow the datastore server to better utilize its
                 resources towards serving the actual workload.
                 Moreover, since the newly compacted data is already
                 cached in the compaction server's main memory, we fetch
                 this data over the network directly into the datastore
                 server's local cache, thereby avoiding the performance
                 penalty of reading it back from the filesystem. In
                 fact, pre-fetching the compacted data from the remote
                 cache prior to switching the workload over to it can
                 eliminate local cache misses altogether. Therefore, we
                 implement a smarter warmup algorithm that ensures that
                 all incoming read requests are served from the
                 datastore server's local cache even as it is warming
                 up. We have integrated our solution into HBase, and
                 using the YCSB and TPC-C benchmarks, we show that our
                 approach significantly mitigates compaction-related
                 performance problems. We also demonstrate the
                 scalability of our solution by distributing compactions
                 across multiple compaction servers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guerraoui:2015:DPD,
  author =       "Rachid Guerraoui and Anne-Marie Kermarrec and Rhicheek
                 Patra and Mahsa Taziki",
  title =        "{D2P}: distance-based differential privacy in
                 recommenders",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "8",
  pages =        "862--873",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:02:29 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The upsurge in the number of web users over the last
                 two decades has resulted in a significant growth of
                 online information. This information growth calls for
                 recommenders that personalize the information proposed
                 to each individual user. Nevertheless, personalization
                 also opens major privacy concerns. This paper presents
                 D2P, a novel protocol that ensures a strong form of
                 differential privacy, which we call distance-based
                 differential privacy, and which is particularly well
                 suited to recommenders. D2P avoids revealing exact user
                 profiles by creating altered profiles where each item
                 is replaced with another one at some distance. We
                 evaluate D2P analytically and experimentally on
                 MovieLens and Jester datasets and compare it with other
                 private and non-private recommenders.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mitliagkas:2015:FFP,
  author =       "Ioannis Mitliagkas and Michael Borokhovich and
                 Alexandros G. Dimakis and Constantine Caramanis",
  title =        "{FrogWild!}: fast {PageRank} approximations on graph
                 engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "8",
  pages =        "874--885",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:02:29 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose FrogWild, a novel algorithm for fast
                 approximation of high PageRank vertices, geared towards
                 reducing network costs of running traditional PageRank
                 algorithms. Our algorithm can be seen as a quantized
                 version of power iteration that performs multiple
                 parallel random walks over a directed graph. One
                 important innovation is that we introduce a
                 modification to the GraphLab framework that only
                 partially synchronizes mirror vertices. This partial
                 synchronization vastly reduces the network traffic
                 generated by traditional PageRank algorithms, thus
                 greatly reducing the per-iteration cost of PageRank. On
                 the other hand, this partial synchronization also
                 creates dependencies between the random walks used to
                 estimate PageRank. Our main theoretical innovation is
                 the analysis of the correlations introduced by this
                 partial synchronization process and a bound
                 establishing that our approximation is close to the
                 true PageRank vector. We implement our algorithm in
                 GraphLab and compare it against the default PageRank
                 implementation. We show that our algorithm is very
                 fast, performing each iteration in less than one second
                 on the Twitter graph and can be up to $ 7 \times $
                 faster compared to the standard GraphLab PageRank
                 implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vattani:2015:OPC,
  author =       "Andrea Vattani and Flavio Chierichetti and Keegan
                 Lowenstein",
  title =        "Optimal probabilistic cache stampede prevention",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "8",
  pages =        "886--897",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Apr 15 19:02:29 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "When a frequently-accessed cache item expires,
                 multiple requests to that item can trigger a cache miss
                 and start regenerating that same item at the same time.
                 This phenomenon, known as cache stampede, severely
                 limits the performance of databases and web servers. A
                 natural countermeasure to this issue is to let the
                 processes that perform such requests to randomly ask
                 for a regeneration before the expiration time of the
                 item. In this paper we give optimal algorithms for
                 performing such probabilistic early expirations. Our
                 algorithms are theoretically optimal and have much
                 better performances than other solutions used in
                 real-world applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Potti:2015:DNP,
  author =       "Navneet Potti and Jignesh M. Patel",
  title =        "{DAQ}: a new paradigm for approximate query
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "9",
  pages =        "898--909",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2777598.2777599",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 15 17:15:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many modern applications deal with exponentially
                 increasing data volumes and aid business-critical
                 decisions in near real-time. Particularly in
                 exploratory data analysis, the focus is on interactive
                 querying and some degree of error in estimated results
                 is tolerable. A common response to this challenge is
                 approximate query processing, where the user is
                 presented with a quick confidence interval estimate
                 based on a sample of the data. In this work, we
                 highlight some of the problems that are associated with
                 this probabilistic approach when extended to more
                 complex queries, both in semantic interpretation and
                 the lack of a formal algebra. As an alternative, we
                 propose deterministic approximate querying (DAQ)
                 schemes, formalize a closed deterministic approximation
                 algebra, and outline some design principles for DAQ
                 schemes. We also illustrate the utility of this
                 approach with an example deterministic online
                 approximation scheme which uses a bitsliced index
                 representation and computes the most significant bits
                 of the result first. Our prototype scheme delivers
                 speedups over exact aggregation and predicate
                 evaluation, and outperforms sampling-based schemes for
                 extreme value aggregations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Anciaux:2015:SSE,
  author =       "Nicolas Anciaux and Saliha Lallali and Iulian Sandu
                 Popa and Philippe Pucheral",
  title =        "A scalable search engine for mass storage smart
                 objects",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "9",
  pages =        "910--921",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2777598.2777600",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 15 17:15:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents a new embedded search engine
                 designed for smart objects. Such devices are generally
                 equipped with extremely low RAM and large Flash storage
                 capacity. To tackle these conflicting hardware
                 constraints, conventional search engines privilege
                 either insertion or query scalability but cannot meet
                 both requirements at the same time. Moreover, very few
                 solutions support document deletions and updates in
                 this context. In this paper, we introduce three design
                 principles, namely Write-Once Partitioning, Linear
                 Pipelining and Background Linear Merging, and show how
                 they can be combined to produce an embedded search
                 engine reconciling high insert\slash delete\slash
                 update rate and query scalability. We have implemented
                 our search engine on a development board having a
                 hardware configuration representative for smart objects
                 and have conducted extensive experiments using two
                 representative datasets. The experimental results
                 demonstrate the scalability of the approach and its
                 superiority compared to state of the art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2015:SMD,
  author =       "Lanjun Wang and Shuo Zhang and Juwei Shi and Limei
                 Jiao and Oktie Hassanzadeh and Jia Zou and Chen Wangz",
  title =        "Schema management for document stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "9",
  pages =        "922--933",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2777598.2777601",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 15 17:15:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Document stores that provide the efficiency of a
                 schema-less interface are widely used by developers in
                 mobile and cloud applications. However, the simplicity
                 developers achieved controversially leads to complexity
                 for data management due to lack of a schema. In this
                 paper, we present a schema management framework for
                 document stores. This framework discovers and persists
                 schemas of JSON records in a repository, and also
                 supports queries and schema summarization. The major
                 technical challenge comes from varied structures of
                 records caused by the schema-less data model and schema
                 evolution. In the discovery phase, we apply a canonical
                 form based method and propose an algorithm based on
                 equivalent sub-trees to group equivalent schemas
                 efficiently. Together with the algorithm, we propose a
                 new data structure, eSiBu-Tree, to store schemas and
                 support queries. In order to present a single
                 summarized representation for heterogeneous schemas in
                 records, we introduce the concept of ``skeleton'', and
                 propose to use it as a relaxed form of the schema,
                 which captures a small set of core attributes. Finally,
                 extensive experiments based on real data sets
                 demonstrate the efficiency of our proposed schema
                 discovery algorithms, and practical use cases in
                 real-world data exploration and integration scenarios
                 are presented to illustrate the effectiveness of using
                 skeletons in these applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schuhknecht:2015:SDS,
  author =       "Felix Martin Schuhknecht and Pankaj Khanchandani and
                 Jens Dittrich",
  title =        "On the surprising difficulty of simple things: the
                 case of radix partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "9",
  pages =        "934--937",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2777598.2777602",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 15 17:15:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Partitioning a dataset into ranges is a task that is
                 common in various applications such as sorting
                 [1,6,7,8,9] and hashing [3] which are in turn building
                 blocks for almost any type of query processing.
                 Especially radix-based partitioning is very popular due
                 to its simplicity and high performance over
                 comparison-based versions [6].",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2015:KBT,
  author =       "Xin Luna Dong and Evgeniy Gabrilovich and Kevin Murphy
                 and Van Dang and Wilko Horn and Camillo Lugaresi and
                 Shaohua Sun and Wei Zhang",
  title =        "Knowledge-based trust: estimating the trustworthiness
                 of web sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "9",
  pages =        "938--949",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2777598.2777603",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 15 17:15:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The quality of web sources has been traditionally
                 evaluated using exogenous signals such as the hyperlink
                 structure of the graph. We propose a new approach that
                 relies on endogenous signals, namely, the correctness
                 of factual information provided by the source. A source
                 that has few false facts is considered to be
                 trustworthy. The facts are automatically extracted from
                 each source by information extraction methods commonly
                 used to construct knowledge bases. We propose a way to
                 distinguish errors made in the extraction process from
                 factual errors in the web source per se, by using joint
                 inference in a novel multi-layer probabilistic model.
                 We call the trustworthiness score we computed
                 Knowledge-Based Trust (KBT). On synthetic data, we show
                 that our method can reliably compute the true
                 trustworthiness levels of the sources. We then apply it
                 to a database of 2.8B facts extracted from the web, and
                 thereby estimate the trustworthiness of 119M webpages.
                 Manual evaluation of a subset of the results confirms
                 the effectiveness of the method.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Han:2015:GUB,
  author =       "Minyang Han and Khuzaima Daudjee",
  title =        "{Giraph} unchained: barrierless asynchronous parallel
                 execution in {Pregel}-like graph processing systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "9",
  pages =        "950--961",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2777598.2777604",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 15 17:15:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The bulk synchronous parallel (BSP) model used by
                 synchronous graph processing systems allows algorithms
                 to be easily implemented and reasoned about. However,
                 BSP can suffer from poor performance due to stale
                 messages and frequent global synchronization barriers.
                 Asynchronous computation models have been proposed to
                 alleviate these overheads but existing asynchronous
                 systems that implement such models have limited
                 scalability or retain frequent global barriers, and do
                 not always support graph mutations or algorithms with
                 multiple computation phases. We propose barrierless
                 asynchronous parallel (BAP), a new computation model
                 that reduces both message staleness and global
                 synchronization. This enables BAP to overcome the
                 limitations of existing asynchronous models while
                 retaining support for graph mutations and algorithms
                 with multiple computation phases. We present GiraphUC,
                 which implements our BAP model in the open source
                 distributed graph processing system Giraph, and
                 evaluate our system at scale with large real-world
                 graphs on 64 EC2 machines. We show that GiraphUC
                 provides across-the-board performance improvements of
                 up to $ 5 \times $ faster over synchronous systems and
                 up to an order of magnitude faster than asynchronous
                 systems. Our results demonstrate that the BAP model
                 provides efficient and transparent asynchronous
                 execution of algorithms that are programmed
                 synchronously.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bogh:2015:WEP,
  author =       "Kenneth S. B{\o}gh and Sean Chester and Ira Assent",
  title =        "Work-efficient parallel skyline computation for the
                 {GPU}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "9",
  pages =        "962--973",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2777598.2777605",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri May 15 17:15:24 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The skyline operator returns records in a dataset that
                 provide optimal trade-offs of multiple dimensions.
                 State-of-the-art skyline computation involves complex
                 tree traversals, data-ordering, and conditional
                 branching to minimize the number of point-to-point
                 comparisons. Meanwhile, GPGPU computing offers the
                 potential for parallelizing skyline computation across
                 thousands of cores. However, attempts to port skyline
                 algorithms to the GPU have prioritized throughput and
                 failed to outperform sequential algorithms. In this
                 paper, we introduce a new skyline algorithm, designed
                 for the GPU, that uses a global, static partitioning
                 scheme. With the partitioning, we can permit controlled
                 branching to exploit transitive relationships and avoid
                 most point-to-point comparisons. The result is a
                 non-traditional GPU algorithm, SkyAlign, that
                 prioritizes work-efficiency and respectable throughput,
                 rather than maximal throughput, to achieve orders of
                 magnitude faster performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lai:2015:SSE,
  author =       "Longbin Lai and Lu Qin and Xuemin Lin and Lijun
                 Chang",
  title =        "Scalable subgraph enumeration in {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "974--985",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794368",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Subgraph enumeration, which aims to find all the
                 subgraphs of a large data graph that are isomorphic to
                 a given pattern graph, is a fundamental graph problem
                 with a wide range of applications. However, existing
                 sequential algorithms for subgraph enumeration fall
                 short in handling large graphs due to the involvement
                 of computationally intensive subgraph isomorphism
                 operations. Thus, some recent researches focus on
                 solving the problem using MapReduce. Nevertheless,
                 exiting MapReduce approaches are not scalable to handle
                 very large graphs since they either produce a huge
                 number of partial results or consume a large amount of
                 memory. Motivated by this, in this paper, we propose a
                 new algorithm TwinTwigJoin based on a left-deep-join
                 framework in MapReduce, in which the basic join unit is
                 a TwinTwig (an edge or two incident edges of a node).
                 We show that in the Erd{\H{o}}s--R{\'e}nyi random-graph
                 model, TwinTwigJoin is instance optimal in the
                 left-deep-join framework under reasonable assumptions,
                 and we devise an algorithm to compute the optimal join
                 plan. Three optimization strategies are explored to
                 improve our algorithm. Furthermore, we discuss how our
                 approach can be adapted in the power-law random-graph
                 model. We conduct extensive performance studies in
                 several real graphs, one of which contains billions of
                 edges. Our approach significantly outperforms existing
                 solutions in all tests.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Finis:2015:IHD,
  author =       "Jan Finis and Robert Brunel and Alfons Kemper and
                 Thomas Neumann and Norman May and Franz Faerber",
  title =        "Indexing highly dynamic hierarchical data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "986--997",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794369",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Maintaining and querying hierarchical data in a
                 relational database system is an important task in many
                 business applications. This task is especially
                 challenging when considering dynamic use cases with a
                 high rate of complex, possibly skewed structural
                 updates. Labeling schemes are widely considered the
                 indexing technique of choice for hierarchical data, and
                 many different schemes have been proposed. However,
                 they cannot handle dynamic use cases well due to
                 various problems which we investigate in this paper. We
                 therefore propose our dynamic Order Indexes, which
                 offer competitive query performance, unprecedented
                 update efficiency, and robustness for highly dynamic
                 workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2015:CDS,
  author =       "Meng Wang and Chaokun Wang and Jeffrey Xu Yu and Jun
                 Zhang",
  title =        "Community detection in social networks: an in-depth
                 benchmarking study with a procedure-oriented
                 framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "998--1009",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794370",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Revealing the latent community structure, which is
                 crucial to understanding the features of networks, is
                 an important problem in network and graph analysis.
                 During the last decade, many approaches have been
                 proposed to solve this challenging problem in diverse
                 ways, i.e. different measures or data structures.
                 Unfortunately, experimental reports on existing
                 techniques fell short in validity and integrity since
                 many comparisons were not based on a unified code base
                 or merely discussed in theory. We engage in an in-depth
                 benchmarking study of community detection in social
                 networks. We formulate a generalized community
                 detection procedure and propose a procedure-oriented
                 framework for benchmarking. This framework enables us
                 to evaluate and compare various approaches to community
                 detection systematically and thoroughly under identical
                 experimental conditions. Upon that we can analyze and
                 diagnose the inherent defect of existing approaches
                 deeply, and further make effective improvements
                 correspondingly. We have re-implemented ten
                 state-of-the-art representative algorithms upon this
                 framework and make comprehensive evaluations of
                 multiple aspects, including the efficiency evaluation,
                 performance evaluations, sensitivity evaluations, etc.
                 We discuss their merits and faults in depth, and draw a
                 set of take-away interesting conclusions. In addition,
                 we present how we can make diagnoses for these
                 algorithms resulting in significant improvements.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kazemi:2015:GGM,
  author =       "Ehsan Kazemi and S. Hamed Hassani and Matthias
                 Grossglauser",
  title =        "Growing a graph matching from a handful of seeds",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1010--1021",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794371",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In many graph--mining problems, two networks from
                 different domains have to be matched. In the absence of
                 reliable node attributes, graph matching has to rely on
                 only the link structures of the two networks, which
                 amounts to a generalization of the classic graph
                 isomorphism problem. Graph matching has applications in
                 social--network reconciliation and de-anonymization,
                 protein--network alignment in biology, and computer
                 vision. The most scalable graph--matching approaches
                 use ideas from percolation theory, where a matched node
                 pair ``infects'' neighbouring pairs as additional
                 potential matches. This class of matching algorithm
                 requires an initial seed set of known matches to start
                 the percolation. The size and correctness of the
                 matching is very sensitive to the size of the seed set.
                 In this paper, we give a new graph--matching algorithm
                 that can operate with a much smaller seed set than
                 previous approaches, with only a small increase in
                 matching errors. We characterize a phase transition in
                 matching performance as a function of the seed set
                 size, using a random bigraph model and ideas from
                 bootstrap percolation theory. We also show the
                 excellent performance in matching several real
                 large-scale social networks, using only a handful of
                 seeds.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cheng:2015:RDB,
  author =       "Peng Cheng and Xiang Lian and Zhao Chen and Rui Fu and
                 Lei Chen and Jinsong Han and Jizhong Zhao",
  title =        "Reliable diversity-based spatial crowdsourcing by
                 moving workers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1022--1033",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794372",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the rapid development of mobile devices and the
                 crowdsourcing platforms, the spatial crowdsourcing has
                 attracted much attention from the database community,
                 specifically, spatial crowdsourcing refers to sending a
                 location-based request to workers according to their
                 positions. In this paper, we consider an important
                 spatial crowdsourcing problem, namely reliable
                 diversity-based spatial crowdsourcing (RDB-SC), in
                 which spatial tasks (such as taking videos/photos of a
                 landmark or firework shows, and checking whether or not
                 parking spaces are available) are time-constrained, and
                 workers are moving towards some directions. Our RDB-SC
                 problem is to assign workers to spatial tasks such that
                 the completion reliability and the spatial/temporal
                 diversities of spatial tasks are maximized. We prove
                 that the RDB-SC problem is NP-hard and intractable.
                 Thus, we propose three effective approximation
                 approaches, including greedy, sampling, and
                 divide-and-conquer algorithms. In order to improve the
                 efficiency, we also design an effective
                 cost-model-based index, which can dynamically maintain
                 moving workers and spatial tasks with low cost, and
                 efficiently facilitate the retrieval of RDB-SC answers.
                 Through extensive experiments, we demonstrate the
                 efficiency and effectiveness of our proposed approaches
                 over both real and synthetic datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2015:LHF,
  author =       "Zhuojie Zhou and Nan Zhang and Gautam Das",
  title =        "Leveraging history for faster sampling of online
                 social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1034--1045",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794373",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With a vast amount of data available on online social
                 networks, how to enable efficient analytics over such
                 data has been an increasingly important research
                 problem. Given the sheer size of such social networks,
                 many existing studies resort to sampling techniques
                 that draw random nodes from an online social network
                 through its restrictive web/API interface. While these
                 studies differ widely in analytics tasks supported and
                 algorithmic design, almost all of them use the exact
                 same underlying technique of random walk --- a Markov
                 Chain Monte Carlo based method which iteratively
                 transits from one node to its random neighbor. Random
                 walk fits naturally with this problem because, for most
                 online social networks, the only query we can issue
                 through the interface is to retrieve the neighbors of a
                 given node (i.e., no access to the full graph
                 topology). A problem with random walks, however, is the
                 ``burn-in'' period which requires a large number of
                 transitions/queries before the sampling distribution
                 converges to a stationary value that enables the
                 drawing of samples in a statistically valid manner. In
                 this paper, we consider a novel problem of speeding up
                 the fundamental design of random walks (i.e., reducing
                 the number of queries it requires) without changing the
                 stationary distribution it achieves --- thereby
                 enabling a more efficient ``drop-in'' replacement for
                 existing sampling-based analytics techniques over
                 online social networks. Technically, our main idea is
                 to leverage the history of random walks to construct a
                 higher-ordered Markov chain. We develop two algorithms,
                 Circulated Neighbors and Groupby Neighbors Random Walk
                 (CNRW and GNRW) and rigidly prove that, no matter what
                 the social network topology is, CNRW and GNRW offer
                 better efficiency than baseline random walks while
                 achieving the same stationary distribution. We
                 demonstrate through extensive experiments on real-world
                 social networks and synthetic graphs the superiority of
                 our techniques over the existing ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2015:TFE,
  author =       "Yufei Ding and Xipeng Shen and Madanlal Musuvathi and
                 Todd Mytkowicz",
  title =        "{TOP}: a framework for enabling algorithmic
                 optimizations for distance-related problems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1046--1057",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794374",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Computing distances among data points is an essential
                 part of many important algorithms in data analytics,
                 graph analysis, and other domains. In each of these
                 domains, developers have spent significant manual
                 effort optimizing algorithms, often through novel
                 applications of the triangle equality, in order to
                 minimize the number of distance computations in the
                 algorithms. In this work, we observe that many
                 algorithms across these domains can be generalized as
                 an instance of a generic distance-related abstraction.
                 Based on this abstraction, we derive seven principles
                 for correctly applying the triangular inequality to
                 optimize distance-related algorithms. Guided by the
                 findings, we develop {Triangular} {OPtimizer} (TOP),
                 the first software framework that is able to
                 automatically produce optimized algorithms that either
                 matches or outperforms manually designed algorithms for
                 solving distance-related problems. TOP achieves up to
                 237x speedups and 2.5X on average.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Leis:2015:EPW,
  author =       "Viktor Leis and Kan Kundhikanjana and Alfons Kemper
                 and Thomas Neumann",
  title =        "Efficient processing of window functions in analytical
                 {SQL} queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1058--1069",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794375",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Window functions, also known as analytic OLAP
                 functions, have been part of the SQL standard for more
                 than a decade and are now a widely-used feature. Window
                 functions allow to elegantly express many useful query
                 types including time series analysis, ranking,
                 percentiles, moving averages, and cumulative sums.
                 Formulating such queries in plain SQL-92 is usually
                 both cumbersome and inefficient. Despite being
                 supported by all major database systems, there have
                 been few publications that describe how to implement an
                 efficient relational window operator. This work aims at
                 filling this gap by presenting an efficient and general
                 algorithm for the window operator. Our algorithm is
                 optimized for high-performance main-memory database
                 systems and has excellent performance on modern
                 multi-core CPUs. We show how to fully parallelize all
                 phases of the operator in order to effectively scale
                 for arbitrary input distributions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2015:RTT,
  author =       "Yuchen Li and Dongxiang Zhang and Kian-Lee Tan",
  title =        "Real-time targeted influence maximization for online
                 advertisements",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1070--1081",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794376",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Advertising in social network has become a
                 multi-billion-dollar industry. A main challenge is to
                 identify key influencers who can effectively contribute
                 to the dissemination of information. Although the
                 influence maximization problem, which finds a seed set
                 of k most influential users based on certain
                 propagation models, has been well studied, it is not
                 target-aware and cannot be directly applied to online
                 advertising. In this paper, we propose a new problem,
                 named Keyword-Based Targeted Influence Maximization
                 (KB-TIM), to find a seed set that maximizes the
                 expected influence over users who are relevant to a
                 given advertisement. To solve the problem, we propose a
                 sampling technique based on weighted reverse influence
                 set and achieve an approximation ratio of $ (1 - - 1 /
                 e - - \epsilon) $. To meet the instant-speed
                 requirement, we propose two disk-based solutions that
                 improve the query processing time by two orders of
                 magnitude over the state-of-the-art solutions, while
                 keeping the theoretical bound. Experiments conducted on
                 two real social networks confirm our theoretical
                 findings as well as the efficiency. Given an
                 advertisement with 5 keywords, it takes only 2 seconds
                 to find the most influential users in a social network
                 with billions of edges.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papenbrock:2015:FDD,
  author =       "Thorsten Papenbrock and Jens Ehrlich and Jannik Marten
                 and Tommy Neubert and Jan-Peer Rudolph and Martin
                 Sch{\"o}nberg and Jakob Zwiener and Felix Naumann",
  title =        "Functional dependency discovery: an experimental
                 evaluation of seven algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1082--1093",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794377",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Functional dependencies are important metadata used
                 for schema normalization, data cleansing and many other
                 tasks. The efficient discovery of functional
                 dependencies in tables is a well-known challenge in
                 database research and has seen several approaches.
                 Because no comprehensive comparison between these
                 algorithms exist at the time, it is hard to choose the
                 best algorithm for a given dataset. In this
                 experimental paper, we describe, evaluate, and compare
                 the seven most cited and most important algorithms, all
                 solving this same problem. First, we classify the
                 algorithms into three different categories, explaining
                 their commonalities. We then describe all algorithms
                 with their main ideas. The descriptions provide
                 additional details where the original papers were
                 ambiguous or incomplete. Our evaluation of careful
                 re-implementations of all algorithms spans a broad test
                 space including synthetic and real-world data. We show
                 that all functional dependency algorithms optimize for
                 certain data characteristics and provide hints on when
                 to choose which algorithm. In summary, however, all
                 current approaches scale surprisingly poorly, showing
                 potential for future research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kalinin:2015:SEI,
  author =       "Alexander Kalinin and Ugur Cetintemel and Stan
                 Zdonik",
  title =        "{Searchlight}: enabling integrated search and
                 exploration over large multidimensional data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1094--1105",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794378",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a new system, called Searchlight, that
                 uniquely integrates constraint solving and data
                 management techniques. It allows Constraint Programming
                 (CP) machinery to run efficiently inside a DBMS without
                 the need to extract, transform and move the data. This
                 marriage concurrently offers the rich expressiveness
                 and efficiency of constraint-based search and
                 optimization provided by modern CP solvers, and the
                 ability of DBMSs to store and query data at scale,
                 resulting in an enriched functionality that can
                 effectively support both data- and search-intensive
                 applications. As such, Searchlight is the first system
                 to support generic search, exploration and mining over
                 large multi-dimensional data collections, going beyond
                 point algorithms designed for point search and mining
                 tasks. Searchlight makes the following scientific
                 contributions: o Constraint solvers as first-class
                 citizens Instead of treating solver logic as a
                 black-box, Searchlight provides native support,
                 incorporating the necessary APIs for its specification
                 and transparent execution as part of query plans, as
                 well as novel algorithms for its optimized execution
                 and parallelization. o Speculative solving Existing
                 solvers assume that the entire data set is main-memory
                 resident. Searchlight uses an innovative two stage
                 Solve-Validate approach that allows it to operate
                 speculatively yet safely on main-memory synopses,
                 quickly producing candidate search results that can
                 later be efficiently validated on real data. o
                 Computation and I/O load balancing As CP solver logic
                 can be computationally expensive, executing it on large
                 search and data spaces requires novel CPU-I/O balancing
                 approaches when performing search distribution. We
                 built a prototype implementation of Searchlight on
                 Google's Or-Tools, an open-source suite of operations
                 research tools, and the array DBMS SciDB. Extensive
                 experimental results show that Searchlight often
                 performs orders of magnitude faster than the next best
                 approach (SciDB-only or CP-solver-only) in terms of end
                 response time and time to first result.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rahman:2015:PID,
  author =       "Md Farhadur Rahman and Weimo Liu and Saravanan
                 Thirumuruganathan and Nan Zhang and Gautam Das",
  title =        "Privacy implications of database ranking",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "10",
  pages =        "1106--1117",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2794367.2794379",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:06 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In recent years, there has been much research in the
                 adoption of Ranked Retrieval model (in addition to the
                 Boolean retrieval model) in structured databases,
                 especially those in a client-server environment (e.g.,
                 web databases). With this model, a search query returns
                 top- k tuples according to not just exact matches of
                 selection conditions, but a suitable ranking function.
                 While much research has gone into the design of ranking
                 functions and the efficient processing of top- k
                 queries, this paper studies a novel problem on the
                 privacy implications of database ranking. The
                 motivation is a novel yet serious privacy leakage we
                 found on real-world web databases which is caused by
                 the ranking function design. Many such databases
                 feature private attributes --- e.g., a social network
                 allows users to specify certain attributes as only
                 visible to him/herself, but not to others. While these
                 websites generally respect the privacy settings by not
                 directly displaying private attribute values in search
                 query answers, many of them nevertheless take into
                 account such private attributes in the ranking function
                 design. The conventional belief might be that tuple
                 ranks alone are not enough to reveal the private
                 attribute values. Our investigation, however, shows
                 that this is not the case in reality. To address the
                 problem, we introduce a taxonomy of the problem space
                 with two dimensions, (1) the type of query interface
                 and (2) the capability of adversaries. For each
                 subspace, we develop a novel technique which either
                 guarantees the successful inference of private
                 attributes, or does so for a significant portion of
                 real-world tuples. We demonstrate the effectiveness and
                 efficiency of our techniques through theoretical
                 analysis, extensive experiments over real-world
                 datasets, as well as successful online attacks over
                 websites with tens to hundreds of millions of users ---
                 e.g., Amazon Goodreads and Renren.com.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kohler:2015:PCS,
  author =       "Henning K{\"o}hler and Sebastian Link and Xiaofang
                 Zhou",
  title =        "Possible and certain {SQL} keys",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1118--1129",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809975",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Driven by the dominance of the relational model, the
                 requirements of modern applications, and the veracity
                 of data, we revisit the fundamental notion of a key in
                 relational databases with NULLs. In SQL database
                 systems primary key columns are NOT NULL by default.
                 NULL columns may occur in unique constraints which only
                 guarantee uniqueness for tuples which do not feature
                 null markers in any of the columns involved, and
                 therefore serve a different function than primary keys.
                 We investigate the notions of possible and certain
                 keys, which are keys that hold in some or all possible
                 worlds that can originate from an SQL table,
                 respectively. Possible keys coincide with the unique
                 constraint of SQL, and thus provide a semantics for
                 their syntactic definition in the SQL standard. Certain
                 keys extend primary keys to include NULL columns, and
                 thus form a sufficient and necessary condition to
                 identify tuples uniquely, while primary keys are only
                 sufficient for that purpose. In addition to basic
                 characterization, axiomatization, and simple discovery
                 approaches for possible and certain keys, we
                 investigate the existence and construction of Armstrong
                 tables, and describe an indexing scheme for enforcing
                 certain keys. Our experiments show that certain keys
                 with NULLs do occur in real-world databases, and that
                 related computational problems can be solved
                 efficiently. Certain keys are therefore semantically
                 well-founded and able to maintain data quality in the
                 form of Codd's entity integrity rule while handling the
                 requirements of modern applications, that is, higher
                 volumes of incomplete data from different formats.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tang:2015:SSJ,
  author =       "Yu Tang and Yilun Cai and Nikos Mamoulis",
  title =        "Scaling similarity joins over tree-structured data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1130--1141",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809976",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a large collection of tree-structured objects
                 (e.g., XML documents), the similarity join finds the
                 pairs of objects that are similar to each other, based
                 on a similarity threshold and a tree edit distance
                 measure. The state-of-the-art similarity join methods
                 compare simpler approximations of the objects (e.g.,
                 strings), in order to prune pairs that cannot be part
                 of the similarity join result based on distance bounds
                 derived by the approximations. In this paper, we
                 propose a novel similarity join approach, which is
                 based on the dynamic decomposition of the tree objects
                 into subgraphs, according to the similarity threshold.
                 Our technique avoids computing the exact distance
                 between two tree objects, if the objects do not share
                 at least one common subgraph. In order to scale up the
                 join, the computed subgraphs are managed in a two-layer
                 index. Our experimental results on real and synthetic
                 data collections show that our approach outperforms the
                 state-of-the-art methods by up to an order of
                 magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rahman:2015:WSE,
  author =       "Habibur Rahman and Saravanan Thirumuruganathan and
                 Senjuti Basu Roy and Sihem Amer-Yahia and Gautam Das",
  title =        "Worker skill estimation in team-based tasks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1142--1153",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809977",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many emerging applications such as collaborative
                 editing, multi-player games, or fan-subbing require to
                 form a team of experts to accomplish a task together.
                 Existing research has investigated how to assign
                 workers to such team-based tasks to ensure the best
                 outcome assuming the skills of individual workers to be
                 known. In this work, we investigate how to estimate
                 individual worker's skill based on the outcome of the
                 team-based tasks they have undertaken. We consider two
                 popular skill aggregation functions and estimate the
                 skill of the workers, where skill is either a
                 deterministic value or a probability distribution. We
                 propose efficient solutions for worker skill estimation
                 using continuous and discrete optimization techniques.
                 We present comprehensive experiments and validate the
                 scalability and effectiveness of our proposed solutions
                 using multiple real-world datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2015:DDP,
  author =       "Xi He and Graham Cormode and Ashwin Machanavajjhala
                 and Cecilia M. Procopiuc and Divesh Srivastava",
  title =        "{DPT}: differentially private trajectory synthesis
                 using hierarchical reference systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1154--1165",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809978",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "GPS-enabled devices are now ubiquitous, from airplanes
                 and cars to smartphones and wearable technology. This
                 has resulted in a wealth of data about the movements of
                 individuals and populations, which can be analyzed for
                 useful information to aid in city and traffic planning,
                 disaster preparedness and so on. However, the places
                 that people go can disclose extremely sensitive
                 information about them, and thus their use needs to be
                 filtered through privacy preserving mechanisms. This
                 turns out to be a highly challenging task: raw
                 trajectories are highly detailed, and typically no pair
                 is alike. Previous attempts fail either to provide
                 adequate privacy protection, or to remain sufficiently
                 faithful to the original behavior. This paper presents
                 DPT, a system to synthesize mobility data based on raw
                 GPS trajectories of individuals while ensuring strong
                 privacy protection in the form of $ \epsilon
                 $-differential privacy. DPT makes a number of novel
                 modeling and algorithmic contributions including (i)
                 discretization of raw trajectories using hierarchical
                 reference systems (at multiple resolutions) to capture
                 individual movements at differing speeds, (ii) adaptive
                 mechanisms to select a small set of reference systems
                 and construct prefix tree counts privately, and (iii)
                 use of direction-weighted sampling for improved
                 utility. While there have been prior attempts to solve
                 the subproblems required to generate synthetic
                 trajectories, to the best of our knowledge, ours is the
                 first system that provides an end-to-end solution. We
                 show the efficacy of our synthetic trajectory
                 generation system using an extensive empirical
                 evaluation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2015:SSA,
  author =       "Boduo Li and Yanlei Diao and Prashant Shenoy",
  title =        "Supporting scalable analytics with latency
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1166--1177",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809979",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently there has been a significant interest in
                 building big data analytics systems that can handle
                 both ``big data'' and ``fast data''. Our work is
                 strongly motivated by recent real-world use cases that
                 point to the need for a general, unified data
                 processing framework to support analytical queries with
                 different latency requirements. Toward this goal, we
                 start with an analysis of existing big data systems to
                 understand the causes of high latency. We then propose
                 an extended architecture with mini-batches as
                 granularity for computation and shuffling, and augment
                 it with new model-driven resource allocation and
                 runtime scheduling techniques to meet user latency
                 requirements while maximizing throughput. Results from
                 real-world workloads show that our techniques,
                 implemented in Incremental Hadoop, reduce its latency
                 from tens of seconds to sub-second, with 2x-5x increase
                 in throughput. Our system also outperforms
                 state-of-the-art distributed stream systems, Storm and
                 Spark Streaming, by 1-2 orders of magnitude when
                 combining latency and throughput.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shiokawa:2015:SEA,
  author =       "Hiroaki Shiokawa and Yasuhiro Fujiwara and Makoto
                 Onizuka",
  title =        "{SCAN++}: efficient algorithm for finding clusters,
                 hubs and outliers on large-scale graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1178--1189",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809980",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph clustering is one of the key techniques for
                 understanding the structures present in graphs. Besides
                 cluster detection, identifying hubs and outliers is
                 also a key task, since they have important roles to
                 play in graph data mining. The structural clustering
                 algorithm SCAN, proposed by Xu et al., is successfully
                 used in many application because it not only detects
                 densely connected nodes as clusters but also identifies
                 sparsely connected nodes as hubs or outliers. However,
                 it is difficult to apply SCAN to large-scale graphs due
                 to its high time complexity. This is because it
                 evaluates the density for all adjacent nodes included
                 in the given graphs. In this paper, we propose a novel
                 graph clustering algorithm named SCAN ++. In order to
                 reduce time complexity, we introduce new data structure
                 of directly two-hop-away reachable node set (DTAR).
                 DTAR is the set of two-hop-away nodes from a given node
                 that are likely to be in the same cluster as the given
                 node. SCAN++ employs two approaches for efficient
                 clustering by using DTARs without sacrificing
                 clustering quality. First, it reduces the number of the
                 density evaluations by computing the density only for
                 the adjacent nodes such as indicated by DTARs. Second,
                 by sharing a part of the density evaluations for DTARs,
                 it offers efficient density evaluations of adjacent
                 nodes. As a result, SCAN++ detects exactly the same
                 clusters, hubs, and outliers from large-scale graphs as
                 SCAN with much shorter computation time. Extensive
                 experiments on both real-world and synthetic graphs
                 demonstrate the performance superiority of SCAN++ over
                 existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Faleiro:2015:RSM,
  author =       "Jose M. Faleiro and Daniel J. Abadi",
  title =        "Rethinking serializable multiversion concurrency
                 control",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1190--1201",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809981",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multi-versioned database systems have the potential to
                 significantly increase the amount of concurrency in
                 transaction processing because they can avoid
                 read-write conflicts. Unfortunately, the increase in
                 concurrency usually comes at the cost of transaction
                 serializability. If a database user requests full
                 serializability, modern multi-versioned systems
                 significantly constrain read-write concurrency among
                 conflicting transactions and employ expensive
                 synchronization patterns in their design. In
                 main-memory multi-core settings, these additional
                 constraints are so burdensome that multi-versioned
                 systems are often significantly outperformed by
                 single-version systems. We propose B ohm, a new
                 concurrency control protocol for main-memory
                 multi-versioned database systems. Bohm guarantees
                 serializable execution while ensuring that reads never
                 block writes. In addition, Bohm does not require reads
                 to perform any bookkeeping whatsoever, thereby avoiding
                 the overhead of tracking reads via contended writes to
                 shared memory. This leads to excellent scalability and
                 performance in multi-core settings. Bohm has all the
                 above characteristics without performing validation
                 based concurrency control. Instead, it is pessimistic,
                 and is therefore not prone to excessive aborts in the
                 presence of contention. An experimental evaluation
                 shows that Bohm performs well in both high contention
                 and low contention settings, and is able to
                 dramatically outperform state-of-the-art
                 multi-versioned systems despite maintaining the full
                 set of serializability guarantees.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Brancotte:2015:RAT,
  author =       "Bryan Brancotte and Bo Yang and Guillaume Blin and
                 Sarah Cohen-Boulakia and Alain Denise and Sylvie
                 Hamel",
  title =        "Rank aggregation with ties: experiments and analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1202--1213",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809982",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of aggregating multiple rankings into one
                 consensus ranking is an active research topic
                 especially in the database community. Various studies
                 have implemented methods for rank aggregation and may
                 have come up with contradicting conclusions upon which
                 algorithms work best. Comparing such results is
                 cumbersome, as the original studies mixed different
                 approaches and used very different evaluation datasets
                 and metrics. Additionally, in real applications, the
                 rankings to be aggregated may not be permutations where
                 elements are strictly ordered, but they may have ties
                 where some elements are placed at the same position.
                 However, most of the studies have not considered ties.
                 This paper introduces the first large scale study of
                 algorithms for rank aggregation with ties. More
                 precisely, (i) we review rank aggregation algorithms
                 and determine whether or not they can handle ties; (ii)
                 we propose the first implementation to compute the
                 exact solution of the Rank Aggregation with ties
                 problem; (iii) we evaluate algorithms for rank
                 aggregation with ties on a very large panel of both
                 real and carefully generated synthetic datasets; (iv)
                 we provide guidance on the algorithms to be favored
                 depending on dataset features.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sundaram:2015:GHP,
  author =       "Narayanan Sundaram and Nadathur Satish and Md Mostofa
                 Ali Patwary and Subramanya R. Dulloor and Michael J.
                 Anderson and Satya Gautam Vadlamudi and Dipankar Das
                 and Pradeep Dubey",
  title =        "{GraphMat}: high performance graph analytics made
                 productive",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1214--1225",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809983",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given the growing importance of large-scale graph
                 analytics, there is a need to improve the performance
                 of graph analysis frameworks without compromising on
                 productivity. GraphMat is our solution to bridge this
                 gap between a user-friendly graph analytics framework
                 and native, hand-optimized code. GraphMat functions by
                 taking vertex programs and mapping them to high
                 performance sparse matrix operations in the backend. We
                 thus get the productivity benefits of a vertex
                 programming framework without sacrificing performance.
                 GraphMat is a single-node multicore graph framework
                 written in C++ which has enabled us to write a diverse
                 set of graph algorithms with the same effort compared
                 to other vertex programming frameworks. GraphMat
                 performs 1.1-7X faster than high performance frameworks
                 such as GraphLab, CombBLAS and Galois. GraphMat also
                 matches the performance of MapGraph, a GPU-based graph
                 framework, despite running on a CPU platform with
                 significantly lower compute and bandwidth resources. It
                 achieves better multicore scalability (13-15X on 24
                 cores) than other frameworks and is 1.2X off native,
                 hand-optimized code on a variety of graph algorithms.
                 Since GraphMat performance depends mainly on a few
                 scalable and well-understood sparse matrix operations,
                 GraphMat can naturally benefit from the trend of
                 increasing parallelism in future hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2015:MKC,
  author =       "Kai Zhang and Kaibo Wang and Yuan Yuan and Lei Guo and
                 Rubao Lee and Xiaodong Zhang",
  title =        "{Mega-KV}: a case for {GPUs} to maximize the
                 throughput of in-memory key--value stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1226--1237",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809984",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In-memory key--value stores play a critical role in
                 data processing to provide high throughput and low
                 latency data accesses. In-memory key--value stores have
                 several unique properties that include (1) data
                 intensive operations demanding high memory bandwidth
                 for fast data accesses, (2) high data parallelism and
                 simple computing operations demanding many slim
                 parallel computing units, and (3) a large working set.
                 As data volume continues to increase, our experiments
                 show that conventional and general-purpose multicore
                 systems are increasingly mismatched to the special
                 properties of key--value stores because they do not
                 provide massive data parallelism and high memory
                 bandwidth; the powerful but the limited number of
                 computing cores do not satisfy the demand of the unique
                 data processing task; and the cache hierarchy may not
                 well benefit to the large working set. In this paper,
                 we make a strong case for GPUs to serve as
                 special-purpose devices to greatly accelerate the
                 operations of in-memory key--value stores.
                 Specifically, we present the design and implementation
                 of Mega-KV, a GPU-based in-memory key--value store
                 system that achieves high performance and high
                 throughput. Effectively utilizing the high memory
                 bandwidth and latency hiding capability of GPUs,
                 Mega-KV provides fast data accesses and significantly
                 boosts overall performance. Running on a commodity PC
                 installed with two CPUs and two GPUs, Mega-KV can
                 process up to 160+ million key--value operations per
                 second, which is 1.4-2.8 times as fast as the
                 state-of-the-art key--value store system on a
                 conventional CPU-based platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kim:2015:TSI,
  author =       "Jinha Kim and Hyungyu Shin and Wook-Shin Han and
                 Sungpack Hong and Hassan Chafi",
  title =        "Taming subgraph isomorphism for {RDF} query
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1238--1249",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809985",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "RDF data are used to model knowledge in various areas
                 such as life sciences, Semantic Web, bioinformatics,
                 and social graphs. The size of real RDF data reaches
                 billions of triples. This calls for a framework for
                 efficiently processing RDF data. The core function of
                 processing RDF data is subgraph pattern matching. There
                 have been two completely different directions for
                 supporting efficient subgraph pattern matching. One
                 direction is to develop specialized RDF query
                 processing engines exploiting the properties of RDF
                 data for the last decade, while the other direction is
                 to develop efficient subgraph isomorphism algorithms
                 for general, labeled graphs for over 30 years. Although
                 both directions have a similar goal (i.e., finding
                 subgraphs in data graphs for a given query graph), they
                 have been independently researched without clear
                 reason. We argue that a subgraph isomorphism algorithm
                 can be easily modified to handle the graph
                 homomorphism, which is the RDF pattern matching
                 semantics, by just removing the injectivity constraint.
                 In this paper, based on the state-of-the-art subgraph
                 isomorphism algorithm, we propose an in-memory
                 solution, Turbo$_{HOM + +}$, which is tamed for the RDF
                 processing, and we compare it with the representative
                 RDF processing engines for several RDF benchmarks in a
                 server machine where billions of triples can be loaded
                 in memory. In order to speed up Turbo$_{HOM + +}$, we
                 also provide a simple yet effective transformation and
                 a series of optimization techniques. Extensive
                 experiments using several RDF benchmarks show that
                 Turbo$_{HOM + +}$ consistently and significantly
                 outperforms the representative RDF engines.
                 Specifically, Turbo$_{HOM + +}$ outperforms its
                 competitors by up to five orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2015:SPI,
  author =       "Lilong Jiang and Arnab Nandi",
  title =        "{SnapToQuery}: providing interactive feedback during
                 exploratory query specification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1250--1261",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809986",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A critical challenge in the data exploration process
                 is discovering and issuing the ``right'' query,
                 especially when the space of possible queries is large.
                 This problem of exploratory query specification is
                 exacerbated by the use of interactive user interfaces
                 driven by mouse, touch, or next-generation,
                 three-dimensional, motion capture-based devices; which,
                 are often imprecise due to jitter and sensitivity
                 issues. In this paper, we propose SnapToQuery, a novel
                 technique that guides users through the query space by
                 providing interactive feedback during the query
                 specification process by ``snapping'' to the user's
                 likely intended queries. These intended queries can be
                 derived from prior query logs, or from the data itself,
                 using methods described in this paper. In order to
                 provide interactive response times over large datasets,
                 we propose two data reduction techniques when snapping
                 to these queries. Performance experiments demonstrate
                 that our algorithms help maintain an interactive
                 experience while allowing for accurate guidance. User
                 studies over three kinds of devices (mouse, touch, and
                 motion capture) show that SnapToQuery can help users
                 specify queries quicker and more accurately; resulting
                 in a query specification time speedup of $ 1.4 \times $
                 for mouse and touch-based devices and $ 2.2 \times $
                 for motion capture-based devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2015:GFI,
  author =       "Yang Zhou and Ling Liu and Kisung Lee and Qi Zhang",
  title =        "{GraphTwist}: fast iterative graph computation with
                 two-tier optimizations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1262--1273",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809987",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale real-world graphs are known to have highly
                 skewed vertex degree distribution and highly skewed
                 edge weight distribution. Existing vertex-centric
                 iterative graph computation models suffer from a number
                 of serious problems: (1) poor performance of parallel
                 execution due to inherent workload imbalance at vertex
                 level; (2) inefficient CPU resource utilization due to
                 short execution time for low-degree vertices compared
                 to the cost of in-memory or on-disk vertex access; and
                 (3) incapability of pruning insignificant vertices or
                 edges to improve the computational performance. In this
                 paper, we address the above technical challenges by
                 designing and implementing a scalable, efficient, and
                 provably correct two-tier graph parallel processing
                 system, GraphTwist. At storage and access tier,
                 GraphTwist maximizes parallel efficiency by employing
                 three graph parallel abstractions for partitioning a
                 big graph by slice, strip or dice based partitioning
                 techniques. At computation tier, GraphTwist presents
                 two utility-aware pruning strategies: slice pruning and
                 cut pruning, to further improve the computational
                 performance while preserving the computational utility
                 defined by graph applications. Theoretic analysis is
                 provided to quantitatively prove that iterative graph
                 computations powered by utility-aware pruning
                 techniques can achieve a very good approximation with
                 bounds on the introduced error.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Inoue:2015:SCF,
  author =       "Hiroshi Inoue and Kenjiro Taura",
  title =        "{SIMD}- and cache-friendly algorithm for sorting an
                 array of structures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1274--1285",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809988",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper describes our new algorithm for sorting an
                 array of structures by efficiently exploiting the SIMD
                 instructions and cache memory of today's processors.
                 Recently, multiway mergesort implemented with SIMD
                 instructions has been used as a high-performance
                 in-memory sorting algorithm for sorting integer values.
                 For sorting an array of structures with SIMD
                 instructions, a frequently used approach is to first
                 pack the key and index for each record into an integer
                 value, sort the key-index pairs using SIMD
                 instructions, then rearrange the records based on the
                 sorted key-index pairs. This approach can efficiently
                 exploit SIMD instructions because it sorts the
                 key-index pairs while packed into integer values;
                 hence, it can use existing high-performance sorting
                 implementations of the SIMD-based multiway mergesort
                 for integers. However, this approach has frequent cache
                 misses in the final rearranging phase due to its random
                 and scattered memory accesses so that this phase limits
                 both single-thread performance and scalability with
                 multiple cores. Our approach is also based on multiway
                 mergesort, but it can avoid costly random accesses for
                 rearranging the records while still efficiently
                 exploiting the SIMD instructions. Our results showed
                 that our approach exhibited up to 2.1x better
                 single-thread performance than the key-index approach
                 implemented with SIMD instructions when sorting 512M
                 16-byte records on one core. Our approach also yielded
                 better performance when we used multiple cores.
                 Compared to an optimized radix sort, our vectorized
                 multiway mergesort achieved better performance when the
                 each record is large. Our vectorized multiway mergesort
                 also yielded higher scalability with multiple cores
                 than the radix sort.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Song:2015:EDI,
  author =       "Shaoxu Song and Aoqian Zhang and Lei Chen and Jianmin
                 Wang",
  title =        "Enriching data imputation with extensive similarity
                 neighbors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1286--1297",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809989",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Incomplete information often occur along with many
                 database applications, e.g., in data integration, data
                 cleaning or data exchange. The idea of data imputation
                 is to fill the missing data with the values of its
                 neighbors who share the same information. Such
                 neighbors could either be identified certainly by
                 editing rules or statistically by relational dependency
                 networks. Unfortunately, owing to data sparsity, the
                 number of neighbors (identified w.r.t. value equality)
                 is rather limited, especially in the presence of data
                 values with variances. In this paper, we argue to
                 extensively enrich similarity neighbors by similarity
                 rules with tolerance to small variations. More fillings
                 can thus be acquired that the aforesaid equality
                 neighbors fail to reveal. To fill the missing values
                 more, we study the problem of maximizing the missing
                 data imputation. Our major contributions include (1)
                 the np-hardness analysis on solving and approximating
                 the problem, (2) exact algorithms for tackling the
                 problem, and (3) efficient approximation with
                 performance guarantees. Experiments on real and
                 synthetic data sets demonstrate that the filling
                 accuracy can be improved.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Makreshanski:2015:LSE,
  author =       "Darko Makreshanski and Justin Levandoski and Ryan
                 Stutsman",
  title =        "To lock, swap, or elide: on the interplay of hardware
                 transactional memory and lock-free indexing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1298--1309",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809990",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The release of hardware transactional memory (HTM) in
                 commodity CPUs has major implications on the design and
                 implementation of main-memory databases, especially on
                 the architecture of high-performance lock-free indexing
                 methods at the core of several of these systems. This
                 paper studies the interplay of HTM and lock-free
                 indexing methods. First, we evaluate whether HTM will
                 obviate the need for crafty lock-free index designs by
                 integrating it in a traditional B-tree architecture.
                 HTM performs well for simple data sets with small
                 fixed-length keys and payloads, but its benefits
                 disappear for more complex scenarios (e.g., larger
                 variable-length keys and payloads), making it
                 unattractive as a general solution for achieving high
                 performance. Second, we explore fundamental differences
                 between HTM-based and lock-free B-tree designs. While
                 lock-freedom entails design complexity and extra
                 mechanism, it has performance advantages in several
                 scenarios, especially high-contention cases where
                 readers proceed uncontested (whereas HTM aborts
                 readers). Finally, we explore the use of HTM as a
                 method to simplify lock-free design. We find that using
                 HTM to implement a multi-word compare-and-swap greatly
                 reduces lock-free programming complexity at the cost of
                 only a 10--15\% performance degradation. Our study uses
                 two state-of-the-art index implementations: a
                 memory-optimized B-tree extended with HTM to provide
                 multi-threaded concurrency and the Bw-tree lock-free
                 B-tree used in several Microsoft production
                 environments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shin:2015:IKB,
  author =       "Jaeho Shin and Sen Wu and Feiran Wang and Christopher
                 {De Sa} and Ce Zhang and Christopher R{\'e}",
  title =        "Incremental knowledge base construction using
                 {DeepDive}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1310--1321",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809991",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Populating a database with unstructured information is
                 a long-standing problem in industry and research that
                 encompasses problems of extraction, cleaning, and
                 integration. Recent names used for this problem include
                 dealing with dark data and knowledge base construction
                 (KBC). In this work, we describe DeepDive, a system
                 that combines database and machine learning ideas to
                 help develop KBC systems, and we present techniques to
                 make the KBC process more efficient. We observe that
                 the KBC process is iterative, and we develop techniques
                 to incrementally produce inference results for KBC
                 systems. We propose two methods for incremental
                 inference, based respectively on sampling and
                 variational techniques. We also study the tradeoff
                 space of these methods and develop a simple rule-based
                 optimizer. DeepDive includes all of these
                 contributions, and we evaluate DeepDive on five KBC
                 systems, showing that it can speed up KBC inference
                 tasks by up to two orders of magnitude with negligible
                 impact on quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qian:2015:LUP,
  author =       "Li Qian and Jinyang Gao and H. V. Jagadish",
  title =        "Learning user preferences by adaptive pairwise
                 comparison",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "11",
  pages =        "1322--1333",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2809974.2809992",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 30 16:13:08 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Users make choices among multi-attribute objects in a
                 data set in a variety of domains including used car
                 purchase, job search and hotel room booking. Individual
                 users sometimes have strong preferences between
                 objects, but these preferences may not be universally
                 shared by all users. If we can cast these preferences
                 as derived from a quantitative user-specific preference
                 function, then we can predict user preferences by
                 learning their preference function, even though the
                 preference function itself is not directly observable,
                 and may be hard to express. In this paper we study the
                 problem of preference learning with pairwise
                 comparisons on a set of entities with multiple
                 attributes. We formalize the problem into two
                 subproblems, namely preference estimation and
                 comparison selection. We propose an innovative approach
                 to estimate the preference, and introduce a binary
                 search strategy to adaptively select the comparisons.
                 We introduce the concept of an orthogonal query to
                 support this adaptive selection, as well as a novel
                 S-tree index to enable efficient evaluation of
                 orthogonal queries. We integrate these components into
                 a system for inferring user preference with adaptive
                 pairwise comparisons. Our experiments and user study
                 demonstrate that our adaptive system significantly
                 outperforms the na{\"\i}ve random selection system on
                 both real data and synthetic data, with either
                 simulated or real user feedback. We also show our
                 preference learning approach is much more effective
                 than existing approaches, and our S-tree can be
                 constructed efficiently and perform orthogonal query at
                 interactive speeds.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2015:AEL,
  author =       "Weimo Liu and Md Farhadur Rahman and Saravanan
                 Thirumuruganathan and Nan Zhang and Gautam Das",
  title =        "Aggregate estimations over location based services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1334--1345",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824034",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Location based services (LBS) have become very popular
                 in recent years. They range from map services (e.g.,
                 Google Maps) that store geographic locations of points
                 of interests, to online social networks (e.g., WeChat,
                 Sina Weibo, FourSquare) that leverage user geographic
                 locations to enable various recommendation functions.
                 The public query interfaces of these services may be
                 abstractly modeled as a k NN interface over a database
                 of two dimensional points on a plane: given an
                 arbitrary query point, the system returns the k points
                 in the database that are nearest to the query point. In
                 this paper we consider the problem of obtaining
                 approximate estimates of SUM and COUNT aggregates by
                 only querying such databases via their restrictive
                 public interfaces. We distinguish between interfaces
                 that return location information of the returned tuples
                 (e.g., Google Maps), and interfaces that do not return
                 location information (e.g., Sina Weibo). For both types
                 of interfaces, we develop aggregate estimation
                 algorithms that are based on novel techniques for
                 precisely computing or approximately estimating the
                 Voronoi cell of tuples. We discuss a comprehensive set
                 of real-world experiments for testing our algorithms,
                 including experiments on Google Maps, WeChat, and Sina
                 Weibo.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bhattacherjee:2015:PDV,
  author =       "Souvik Bhattacherjee and Amit Chavan and Silu Huang
                 and Amol Deshpande and Aditya Parameswaran",
  title =        "Principles of dataset versioning: exploring the
                 recreation\slash storage tradeoff",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1346--1357",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824035",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The relative ease of collaborative data science and
                 analysis has led to a proliferation of many thousands
                 or millions of versions of the same datasets in many
                 scientific and commercial domains, acquired or
                 constructed at various stages of data analysis across
                 many users, and often over long periods of time.
                 Managing, storing, and recreating these dataset
                 versions is a non-trivial task. The fundamental
                 challenge here is the storage-recreation trade-off: the
                 more storage we use, the faster it is to recreate or
                 retrieve versions, while the less storage we use, the
                 slower it is to recreate or retrieve versions. Despite
                 the fundamental nature of this problem, there has been
                 a surprisingly little amount of work on it. In this
                 paper, we study this trade-off in a principled manner:
                 we formulate six problems under various settings,
                 trading off these quantities in various ways,
                 demonstrate that most of the problems are intractable,
                 and propose a suite of inexpensive heuristics drawing
                 from techniques in delay-constrained scheduling, and
                 spanning tree literature, to solve these problems. We
                 have built a prototype version management system, that
                 aims to serve as a foundation to our D ataHub system
                 for facilitating collaborative data science. We
                 demonstrate, via extensive experiments, that our
                 proposed heuristics provide efficient solutions in
                 practical dataset versioning scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2015:SJJ,
  author =       "Yeye He and Kris Ganjam and Xu Chu",
  title =        "{SEMA--JOIN}: joining semantically-related tables
                 using big table corpora",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1358--1369",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824036",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Join is a powerful operator that combines records from
                 two or more tables, which is of fundamental importance
                 in the field of relational database. However,
                 traditional join processing mostly relies on string
                 equality comparisons. Given the growing demand for
                 ad-hoc data analysis, we have seen an increasing number
                 of scenarios where the desired join relationship is not
                 equi-join. For example, in a spreadsheet environment, a
                 user may want to join one table with a subject column
                 country-name, with another table with a subject column
                 country-code. Traditional equi-join cannot handle such
                 joins automatically, and the user typically has to
                 manually find an intermediate mapping table in order to
                 perform the desired join. We develop a SEMA-JOIN
                 approach that is a first step toward allowing users to
                 perform semantic join automatically, with a click of
                 the button. Our main idea is to utilize a data-driven
                 method that leverages a big table corpus with over 100
                 million tables to determine statistical correlation
                 between cell values at both row-level and column-level.
                 We use the intuition that the correct join mapping is
                 the one that maximizes aggregate pairwise correlation,
                 to formulate the join prediction problem as an
                 optimization problem. We develop a linear program
                 relaxation and a rounding argument to obtain a
                 2-approximation algorithm in polynomial time. Our
                 evaluation using both public tables from the Web and
                 proprietary Enterprise tables from a large company
                 shows that the proposed approach can perform automatic
                 semantic joins with high precision for a variety of
                 common join scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Krishnan:2015:SVC,
  author =       "Sanjay Krishnan and Jiannan Wang and Michael J.
                 Franklin and Ken Goldberg and Tim Kraska",
  title =        "Stale view cleaning: getting fresh answers from stale
                 materialized views",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1370--1381",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824037",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Materialized views (MVs), stored pre-computed results,
                 are widely used to facilitate fast queries on large
                 datasets. When new records arrive at a high rate, it is
                 infeasible to continuously update (maintain) MVs and a
                 common solution is to defer maintenance by batching
                 updates together. Between batches the MVs become
                 increasingly stale with incorrect, missing, and
                 superfluous rows leading to increasingly inaccurate
                 query results. We propose Stale View Cleaning (SVC)
                 which addresses this problem from a data cleaning
                 perspective. In SVC, we efficiently clean a sample of
                 rows from a stale MV, and use the clean sample to
                 estimate aggregate query results. While approximate,
                 the estimated query results reflect the most recent
                 data. As sampling can be sensitive to long-tailed
                 distributions, we further explore an outlier indexing
                 technique to give increased accuracy when the data
                 distributions are skewed. SVC complements existing
                 deferred maintenance approaches by giving accurate and
                 bounded query answers between maintenance. We evaluate
                 our method on a generated dataset from the TPC-D
                 benchmark and a real video distribution application.
                 Experiments confirm our theoretical results: (1)
                 cleaning an MV sample is more efficient than full view
                 maintenance, (2) the estimated results are more
                 accurate than using the stale MV, and (3) SVC is
                 applicable for a wide variety of MVs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nagarkar:2015:CSH,
  author =       "Parth Nagarkar and K. Sel{\c{c}}uk Candan and Aneesha
                 Bhat",
  title =        "Compressed spatial hierarchical bitmap {(cSHB)}
                 indexes for efficiently processing spatial range query
                 workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1382--1393",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824038",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In most spatial data management applications, objects
                 are represented in terms of their coordinates in a
                 2-dimensional space and search queries in this space
                 are processed using spatial index structures. On the
                 other hand, bitmap-based indexing, especially thanks to
                 the compression opportunities bitmaps provide, has been
                 shown to be highly effective for query processing
                 workloads including selection and aggregation
                 operations. In this paper, we show that bitmap-based
                 indexing can also be highly effective for managing
                 spatial data sets. More specifically, we propose a
                 novel compressed spatial hierarchical bitmap (cSHB)
                 index structure to support spatial range queries. We
                 consider query workloads involving multiple range
                 queries over spatial data and introduce and consider
                 the problem of bitmap selection for identifying the
                 appropriate subset of the bitmap files for processing
                 the given spatial range query workload. We develop cost
                 models for compressed domain range query processing and
                 present query planning algorithms that not only select
                 index nodes for query processing, but also associate
                 appropriate bitwise logical operations to identify the
                 data objects satisfying the range queries in the given
                 workload. Experiment results confirm the efficiency and
                 effectiveness of the proposed compressed spatial
                 hierarchical bitmap (cSHB) index structure and the
                 range query planning algorithms in supporting spatial
                 range query workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2015:SPD,
  author =       "Daniel Deutch and Amir Gilad and Yuval Moskovitch",
  title =        "Selective provenance for datalog programs using
                 top-$k$ queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1394--1405",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824039",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Highly expressive declarative languages, such as
                 datalog, are now commonly used to model the operational
                 logic of data-intensive applications. The typical
                 complexity of such datalog programs, and the large
                 volume of data that they process, call for result
                 explanation. Results may be explained through the
                 tracking and presentation of data provenance, and here
                 we focus on a detailed form of provenance (
                 how-provenance), defining it as the set of derivation
                 trees of a given fact. While informative, the size of
                 such full provenance information is typically too large
                 and complex (even when compactly represented) to allow
                 displaying it to the user. To this end, we propose a
                 novel top-$k$ query language for querying datalog
                 provenance, supporting selection criteria based on tree
                 patterns and ranking based on the rules and database
                 facts used in derivation. We propose an efficient novel
                 algorithm based on (1) instrumenting the datalog
                 program so that, upon evaluation, it generates only
                 relevant provenance, and (2) efficient top-$k$
                 (relevant) provenance generation, combined with
                 bottom-up datalog evaluation. The algorithm computes in
                 polynomial data complexity a compact representation of
                 the top-$k$ trees which may be explicitly constructed
                 in linear time with respect to their size. We further
                 experimentally study the algorithm performance, showing
                 its scalability even for complex datalog programs where
                 full provenance tracking is infeasible.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Park:2015:PPS,
  author =       "Yoonjae Park and Jun-Ki Min and Kyuseok Shim",
  title =        "Processing of probabilistic skyline queries using
                 {MapReduce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1406--1417",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824040",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There has been an increased growth in a number of
                 applications that naturally generate large volumes of
                 uncertain data. By the advent of such applications, the
                 support of advanced analysis queries such as the
                 skyline and its variant operators for big uncertain
                 data has become important. In this paper, we propose
                 the effective parallel algorithms using MapReduce to
                 process the probabilistic skyline queries for uncertain
                 data modeled by both discrete and continuous models. We
                 present three filtering methods to identify
                 probabilistic non-skyline objects in advance. We next
                 develop a single MapReduce phase algorithm PS-QP-MR by
                 utilizing space partitioning based on a variant of
                 quadtrees to distribute the instances of objects
                 effectively and the enhanced algorithm PS-QPF-MR by
                 applying the three filtering methods additionally. We
                 also propose the workload balancing technique to
                 balance the workload of reduce functions based on the
                 number of machines available. Finally, we present the
                 brute-force algorithms PS-BR-MR and PS-BRF-MR with
                 partitioning randomly and applying the filtering
                 methods. In our experiments, we demonstrate the
                 efficiency and scalability of PS-QPF-MR compared to the
                 other algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2015:BVS,
  author =       "Xiaofei Zhang and Hong Cheng and Lei Chen",
  title =        "Bonding vertex sets over distributed graph: a
                 betweenness aware approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1418--1429",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824041",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given two sets of vertices in a graph, it is often of
                 a great interest to find out how these vertices are
                 connected, especially to identify the vertices of high
                 prominence defined on the topological structure. In
                 this work, we formally define a V ertex S et B onding
                 query (shorted as VSB), which returns a minimum set of
                 vertices with the maximum importance w.r.t total
                 betweenness and shortest path reachability in
                 connecting two sets of input vertices. We find that
                 such a kind of query is representative and could be
                 widely applied in many real world scenarios, e.g.,
                 logistic planning, social community bonding and etc.
                 Challenges are that many of such applications are
                 constructed on graphs that are too large to fit in
                 single server, and the VSB query evaluation turns to be
                 NP-hard. To cope with the scalability issue and return
                 the near optimal result in almost real time, we propose
                 a generic solution framework on a shared nothing
                 distributed environment. With the development of two
                 novel techniques, guided graph exploration and
                 betweenness ranking on exploration, we are able to
                 efficiently evaluate queries for error bounded results
                 with bounded space cost. We demonstrate the
                 effectiveness of our solution with extensive
                 experiments over both real and synthetic large graphs
                 on the Google's Cloud platform. Comparing to the
                 exploration only baseline method, our method achieves
                 several times of speedup.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amsterdamer:2015:NLI,
  author =       "Yael Amsterdamer and Anna Kukliansky and Tova Milo",
  title =        "A natural language interface for querying general and
                 individual knowledge",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1430--1441",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824042",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many real-life scenarios require the joint analysis of
                 general knowledge, which includes facts about the
                 world, with individual knowledge, which relates to the
                 opinions or habits of individuals. Recently developed
                 crowd mining platforms, which were designed for such
                 tasks, are a major step towards the solution. However,
                 these platforms require users to specify their
                 information needs in a formal, declarative language,
                 which may be too complicated for na{\"\i}ve users. To
                 make the joint analysis of general and individual
                 knowledge accessible to the public, it is desirable to
                 provide an interface that translates the user
                 questions, posed in natural language (NL), into the
                 formal query languages that crowd mining platforms
                 support. While the translation of NL questions to
                 queries over conventional databases has been studied in
                 previous work, a setting with mixed individual and
                 general knowledge raises unique challenges. In
                 particular, to support the distinct query constructs
                 associated with these two types of knowledge, the NL
                 question must be partitioned and translated using
                 different means; yet eventually all the translated
                 parts should be seamlessly combined to a well-formed
                 query. To account for these challenges, we design and
                 implement a modular translation framework that employs
                 new solutions along with state-of-the art NL parsing
                 tools. The results of our experimental study, involving
                 real user questions on various topics, demonstrate that
                 our framework provides a high-quality translation for
                 many questions that are not handled by previous
                 translation tools.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Psaroudakis:2015:SCM,
  author =       "Iraklis Psaroudakis and Tobias Scheuer and Norman May
                 and Abdelkader Sellami and Anastasia Ailamaki",
  title =        "Scaling up concurrent main-memory column-store scans:
                 towards adaptive {NUMA}-aware data and task placement",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1442--1453",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824043",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Main-memory column-stores are called to efficiently
                 use modern non-uniform memory access (NUMA)
                 architectures to service concurrent clients on big
                 data. The efficient usage of NUMA architectures depends
                 on the data placement and scheduling strategy of the
                 column-store. Most column-stores choose a static
                 strategy that involves partitioning all data across the
                 NUMA architecture, and employing a stealing-based task
                 scheduler. In this paper, we implement different
                 strategies for data placement and task scheduling for
                 the case of concurrent scans. We compare these
                 strategies with an extensive sensitivity analysis. Our
                 most significant findings include that unnecessary
                 partitioning can hurt throughput by up to 70\%, and
                 that stealing memory-intensive tasks can hurt
                 throughput by up to 58\%. Based on our analysis, we
                 envision a design that adapts the data placement and
                 task scheduling strategy to the workload.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Oh:2015:SOP,
  author =       "Gihwan Oh and Sangchul Kim and Sang-Won Lee and Bongki
                 Moon",
  title =        "{SQLite} optimization with phase change memory for
                 mobile applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1454--1465",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824044",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given its pervasive use in smart mobile platforms,
                 there is a compelling need to optimize the performance
                 of sluggish SQLite databases. Popular mobile
                 applications such as messenger, email and social
                 network services rely on SQLite for their data
                 management need. Those mobile applications tend to
                 execute relatively short transactions in the autocommit
                 mode for transactional consistency in databases. This
                 often has adverse effect on the flash memory storage in
                 mobile devices because the small random updates cause
                 high write amplification and high write latency. In
                 order to address this problem, we propose a new
                 optimization strategy, called per-page logging (PPL),
                 for mobile data management, and have implemented the
                 key functions in SQLite/PPL. The hardware component of
                 SQLite/PPL includes phase change memory (PCM) with a
                 byte-addressable, persistent memory abstraction. By
                 capturing an update in a physiological log record and
                 adding it to the PCM log sector, SQLite/PPL can replace
                 a multitude of successive page writes made to the same
                 logical page with much smaller log writes done to PCM
                 much more efficiently. We have observed that SQLite/PPL
                 would potentially improve the performance of mobile
                 applications by an order of magnitude while supporting
                 transactional atomicity and durability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Crotty:2015:ACU,
  author =       "Andrew Crotty and Alex Galakatos and Kayhan Dursun and
                 Tim Kraska and Carsten Binnig and Ugur Cetintemel and
                 Stan Zdonik",
  title =        "An architecture for compiling {UDF}-centric
                 workflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1466--1477",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824045",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data analytics has recently grown to include
                 increasingly sophisticated techniques, such as machine
                 learning and advanced statistics. Users frequently
                 express these complex analytics tasks as workflows of
                 user-defined functions (UDFs) that specify each
                 algorithmic step. However, given typical hardware
                 configurations and dataset sizes, the core challenge of
                 complex analytics is no longer sheer data volume but
                 rather the computation itself, and the next generation
                 of analytics frameworks must focus on optimizing for
                 this computation bottleneck. While query compilation
                 has gained widespread popularity as a way to tackle the
                 computation bottleneck for traditional SQL workloads,
                 relatively little work addresses UDF-centric workflows
                 in the domain of complex analytics. In this paper, we
                 describe a novel architecture for automatically
                 compiling workflows of UDFs. We also propose several
                 optimizations that consider properties of the data,
                 UDFs, and hardware together in order to generate
                 different code on a case-by-case basis. To evaluate our
                 approach, we implemented these techniques in Tupleware,
                 a new high-performance distributed analytics system,
                 and our benchmarks show performance improvements of up
                 to three orders of magnitude compared to alternative
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Margo:2015:SDG,
  author =       "Daniel Margo and Margo Seltzer",
  title =        "A scalable distributed graph partitioner",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1478--1489",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824046",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present Scalable Host-tree Embeddings for Efficient
                 Partitioning (Sheep), a distributed graph partitioning
                 algorithm capable of handling graphs that far exceed
                 main memory. Sheep produces high quality edge
                 partitions an order of magnitude faster than both state
                 of the art offline (e.g., METIS) and streaming
                 partitioners (e.g., Fennel). Sheep's partitions are
                 independent of the input graph distribution, which
                 means that graph elements can be assigned to processing
                 nodes arbitrarily without affecting the partition
                 quality. Sheep transforms the input graph into a
                 strictly smaller elimination tree via a distributed
                 map-reduce operation. By partitioning this tree, Sheep
                 finds an upper-bounded communication volume
                 partitioning of the original graph. We describe the
                 Sheep algorithm and analyze its space-time
                 requirements, partition quality, and intuitive
                 characteristics and limitations. We compare Sheep to
                 contemporary partitioners and demonstrate that Sheep
                 creates competitive partitions, scales to larger
                 graphs, and has better runtime.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sharov:2015:TMY,
  author =       "Artyom Sharov and Alexander Shraer and Arif Merchant
                 and Murray Stokely",
  title =        "Take me to your leader!: online optimization of
                 distributed storage configurations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1490--1501",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824047",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The configuration of a distributed storage system
                 typically includes, among other parameters, the set of
                 servers and their roles in the replication protocol.
                 Although mechanisms for changing the configuration at
                 runtime exist, it is usually left to system
                 administrators to manually determine the ``best''
                 configuration and periodically reconfigure the system,
                 often by trial and error. This paper describes a new
                 workload-driven optimization framework that dynamically
                 determines the optimal configuration at run-time. We
                 focus on optimizing leader and quorum based replication
                 schemes and divide the framework into three
                 optimization tiers, dynamically optimizing different
                 configuration aspects: (1) leader placement, (2) roles
                 of different servers in the replication protocol, and
                 (3) replica locations. We showcase our optimization
                 framework by applying it to a large-scale distributed
                 storage system used internally in Google and
                 demonstrate that most client applications significantly
                 benefit from using our framework, reducing average
                 operation latency by up to 94\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2015:ARG,
  author =       "Wenfei Fan and Xin Wang and Yinghui Wu and Jingbo Xu",
  title =        "Association rules with graph patterns",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1502--1513",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824048",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose graph-pattern association rules (GPARs) for
                 social media marketing. Extending association rules for
                 item-sets, GPARs help us discover regularities between
                 entities in social graphs, and identify potential
                 customers by exploring social influence. We study the
                 problem of discovering top- k diversified GPARs. While
                 this problem is NP-hard, we develop a parallel
                 algorithm with accuracy bound. We also study the
                 problem of identifying potential customers with GPARs.
                 While it is also NP-hard, we provide a parallel
                 scalable algorithm that guarantees a polynomial speedup
                 over sequential algorithms with the increase of
                 processors. Using real-life and synthetic graphs, we
                 experimentally verify the scalability and effectiveness
                 of the algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kimmett:2015:FJM,
  author =       "Ben Kimmett and Venkatesh Srinivasan and Alex Thomo",
  title =        "Fuzzy joins in {MapReduce}: an experimental study",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1514--1517",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824049",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We report experimental results for the MapReduce
                 algorithms proposed by Afrati, Das Sarma, Menestrina,
                 Parameswaran and Ullman in ICDE'12 to compute fuzzy
                 joins of binary strings using Hamming Distance. Their
                 algorithms come with complete theoretical analysis,
                 however, no experimental evaluation is provided. They
                 argue that there is a tradeoff between communication
                 cost and processing cost, and that there is a skyline
                 of the proposed algorithms; i.e. none dominates
                 another. We observe via experiments that, from a
                 practical point of view, some algorithms are almost
                 always preferable to others. We provide detailed
                 experimental results and insights that show the
                 different facets of each algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cho:2015:PEP,
  author =       "Minsik Cho and Daniel Brand and Rajesh Bordawekar and
                 Ulrich Finkler and Vincent Kulandaisamy and Ruchir
                 Puri",
  title =        "{PARADIS}: an efficient parallel algorithm for
                 in-place radix sort",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1518--1529",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824050",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In-place radix sort is a popular distribution-based
                 sorting algorithm for short numeric or string keys due
                 to its linear run-time and constant memory complexity.
                 However, efficient parallelization of in-place radix
                 sort is very challenging for two reasons. First, the
                 initial phase of permuting elements into buckets
                 suffers read-write dependency inherent in its in-place
                 nature. Secondly, load balancing of the recursive
                 application of the algorithm to the resulting buckets
                 is difficult when the buckets are of very different
                 sizes, which happens for skewed distributions of the
                 input data. In this paper, we present a novel parallel
                 in-place radix sort algorithm, PARADIS, which addresses
                 both problems: (a) ``speculative permutation'' solves
                 the first problem by assigning multiple non-continuous
                 array stripes to each processor. The resulting
                 shared-nothing scheme achieves full parallelization.
                 Since our speculative permutation is not complete, it
                 is followed by a ``repair'' phase, which can again be
                 done in parallel without any data sharing among the
                 processors. (b) ``distribution-adaptive load
                 balancing'' solves the second problem. We dynamically
                 allocate processors in the context of radix sort, so as
                 to minimize the overall completion time. Our
                 experimental results show that PARADIS offers excellent
                 performance/scalability on a wide range of input data
                 sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vengerov:2015:JSE,
  author =       "David Vengerov and Andre Cavalheiro Menck and Mohamed
                 Zait and Sunil P. Chakkappen",
  title =        "Join size estimation subject to filter conditions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1530--1541",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824051",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we present a new algorithm for
                 estimating the size of equality join of multiple
                 database tables. The proposed algorithm, Correlated
                 Sampling, constructs a small space synopsis for each
                 table, which can then be used to provide a quick
                 estimate of the join size of this table with other
                 tables subject to dynamically specified predicate
                 filter conditions, possibly specified over multiple
                 columns (attributes) of each table. This algorithm
                 makes a single pass over the data and is thus suitable
                 for streaming scenarios. We compare this algorithm
                 analytically to two other previously known sampling
                 approaches (independent Bernoulli Sampling and
                 End-Biased Sampling) and to a novel sketch-based
                 approach. We also compare these four algorithms
                 experimentally and show that results fully correspond
                 to our analytical predictions based on derived
                 expressions for the estimator variances, with
                 Correlated Sampling giving the best estimates in a
                 large range of situations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2015:AFT,
  author =       "Jingjing Wang and Magdalena Balazinska and Daniel
                 Halperin",
  title =        "Asynchronous and fault-tolerant recursive datalog
                 evaluation in shared-nothing engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1542--1553",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824052",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a new approach for data analytics with
                 iterations. Users express their analysis in Datalog
                 with bag-monotonic aggregate operators, which enables
                 the expression of computations from a broad variety of
                 application domains. Queries are translated into query
                 plans that can execute in shared-nothing engines, are
                 incremental, and support a variety of iterative models
                 (synchronous, asynchronous, different processing
                 priorities) and failure-handling techniques. The plans
                 require only small extensions to an existing
                 shared-nothing engine, making the approach easily
                 implementable. We implement the approach in the Myria
                 big-data management system and use our implementation
                 to empirically study the performance characteristics of
                 different combinations of iterative models, failure
                 handling methods, and applications. Our evaluation uses
                 workloads from a variety of application domains. We
                 find that no single method outperforms others but
                 rather that application properties must drive the
                 selection of the iterative query execution model.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mouratidis:2015:MRQ,
  author =       "Kyriakos Mouratidis and Jilian Zhang and HweeHwa
                 Pang",
  title =        "Maximum rank query",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1554--1565",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824053",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The top- k query is a common means to shortlist a
                 number of options from a set of alternatives, based on
                 the user's preferences. Typically, these preferences
                 are expressed as a vector of query weights, defined
                 over the options' attributes. The query vector
                 implicitly associates each alternative with a numeric
                 score, and thus imposes a ranking among them. The top-
                 k result includes the k options with the highest
                 scores. In this context, we define the maximum rank
                 query (MaxRank). Given a focal option in a set of
                 alternatives, the MaxRank problem is to compute the
                 highest rank this option may achieve under any possible
                 user preference, and furthermore, to report all the
                 regions in the query vector's domain where that rank is
                 achieved. MaxRank finds application in market impact
                 analysis, customer profiling, targeted advertising,
                 etc. We propose a methodology for MaxRank processing
                 and evaluate it with experiments on real and benchmark
                 synthetic datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Katsarou:2015:PSI,
  author =       "Foteini Katsarou and Nikos Ntarmos and Peter
                 Triantafillou",
  title =        "Performance and scalability of indexed subgraph query
                 processing methods",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1566--1577",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824054",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph data management systems have become very popular
                 as graphs are the natural data model for many
                 applications. One of the main problems addressed by
                 these systems is subgraph query processing; i.e., given
                 a query graph, return all graphs that contain the
                 query. The naive method for processing such queries is
                 to perform a subgraph isomorphism test against each
                 graph in the dataset. This obviously does not scale, as
                 subgraph isomorphism is NP-Complete. Thus, many
                 indexing methods have been proposed to reduce the
                 number of candidate graphs that have to underpass the
                 subgraph isomorphism test. In this paper, we identify a
                 set of key factors-parameters, that influence the
                 performance of related methods: namely, the number of
                 nodes per graph, the graph density, the number of
                 distinct labels, the number of graphs in the dataset,
                 and the query graph size. We then conduct comprehensive
                 and systematic experiments that analyze the sensitivity
                 of the various methods on the values of the key
                 parameters. Our aims are twofold: first to derive
                 conclusions about the algorithms' relative performance,
                 and, second, to stress-test all algorithms, deriving
                 insights as to their scalability, and highlight how
                 both performance and scalability depend on the above
                 factors. We choose six well-established indexing
                 methods, namely Grapes, CT-Index, GraphGrepSX, gIndex,
                 Tree+ $ \Delta $, and gCode, as representative
                 approaches of the overall design space, including the
                 most recent and best performing methods. We report on
                 their index construction time and index size, and on
                 query processing performance in terms of time and false
                 positive ratio. We employ both real and synthetic
                 datasets. Specifically, four real datasets of different
                 characteristics are used: AIDS, PDBS, PCM, and PPI. In
                 addition, we generate a large number of synthetic graph
                 datasets, empowering us to systematically study the
                 algorithms' performance and scalability versus the
                 aforementioned key parameters.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2015:LDA,
  author =       "Ying Yang and Niccol{\`o} Meneghetti and Ronny Fehling
                 and Zhen Hua Liu and Oliver Kennedy",
  title =        "Lenses: an on-demand approach to {ETL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1578--1589",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824055",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Three mentalities have emerged in analytics. One view
                 holds that reliable analytics is impossible without
                 high-quality data, and relies on heavy-duty ETL
                 processes and upfront data curation to provide it. The
                 second view takes a more ad-hoc approach, collecting
                 data into a data lake, and placing responsibility for
                 data quality on the analyst querying it. A third,
                 on-demand approach has emerged over the past decade in
                 the form of numerous systems like Paygo or HLog, which
                 allow for incremental curation of the data and help
                 analysts to make principled trade-offs between data
                 quality and effort. Though quite useful in isolation,
                 these systems target only specific quality problems
                 (e.g., Paygo targets only schema matching and entity
                 resolution). In this paper, we explore the design of a
                 general, extensible infrastructure for on-demand
                 curation that is based on probabilistic query
                 processing. We illustrate its generality through
                 examples and show how such an infrastructure can be
                 used to gracefully make existing ETL workflows
                 ``on-demand''. Finally, we present a user interface for
                 On-Demand ETL and address ensuing challenges, including
                 that of efficiently ranking potential data curation
                 tasks. Our experimental results show that On-Demand ETL
                 is feasible and that our greedy ranking strategy for
                 curation tasks, called CPI, is effective.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2015:KG,
  author =       "Wenfei Fan and Zhe Fan and Chao Tian and Xin Luna
                 Dong",
  title =        "Keys for graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1590--1601",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824056",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Keys for graphs aim to uniquely identify entities
                 represented by vertices in a graph. We propose a class
                 of keys that are recursively defined in terms of graph
                 patterns, and are interpreted with subgraph
                 isomorphism. Extending conventional keys for relations
                 and XML, these keys find applications in object
                 identification, knowledge fusion and social network
                 reconciliation. As an application, we study the entity
                 matching problem that, given a graph $G$ and a set $
                 \Sigma $ of keys, is to find all pairs of entities
                 (vertices) in $G$ that are identified by keys in $
                 \Sigma $. We show that the problem is intractable, and
                 cannot be parallelized in logarithmic rounds.
                 Nonetheless, we provide two parallel scalable
                 algorithms for entity matching, in MapReduce and a
                 vertex-centric asynchronous model. Using real-life and
                 synthetic data, we experimentally verify the
                 effectiveness and scalability of the algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eldawy:2015:SPT,
  author =       "Ahmed Eldawy and Louai Alarabi and Mohamed F. Mokbel",
  title =        "Spatial partitioning techniques in {SpatialHadoop}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1602--1605",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824057",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "SpatialHadoop is an extended MapReduce framework that
                 supports global indexing that spatial partitions the
                 data across machines providing orders of magnitude
                 speedup, compared to traditional Hadoop. In this paper,
                 we describe seven alternative partitioning techniques
                 and experimentally study their effect on the quality of
                 the generated index and the performance of range and
                 spatial join queries. We found that using a 1\% sample
                 is enough to produce high quality partitions. Also, we
                 found that the total area of partitions is a reasonable
                 measure of the quality of indexes when running spatial
                 join. This study will assist researchers in choosing a
                 good spatial partitioning technique in distributed
                 environments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Manabe:2015:ELH,
  author =       "Tomohiro Manabe and Keishi Tajima",
  title =        "Extracting logical hierarchical structure of {HTML}
                 documents based on headings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1606--1617",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824058",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose a method for extracting logical
                 hierarchical structure of HTML documents. Because
                 mark-up structure in HTML documents does not
                 necessarily coincide with logical hierarchical
                 structure, it is not trivial how to extract logical
                 structure of HTML documents. Human readers, however,
                 easily understand their logical structure. The key
                 information used by them is headings in the documents.
                 Human readers exploit the following properties of
                 headings: (1) headings appear at the beginning of the
                 corresponding blocks, (2) headings are given prominent
                 visual styles, (3) headings of the same level share the
                 same visual style, and (4) headings of higher levels
                 are given more prominent visual styles. Our method also
                 exploits these properties for extracting hierarchical
                 headings and their associated blocks. Our experiment
                 shows that our method outperforms existing methods. In
                 addition, our method extracts not only hierarchical
                 blocks but also their associated headings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Naidan:2015:PSM,
  author =       "Bilegsaikhan Naidan and Leonid Boytsov and Eric
                 Nyberg",
  title =        "Permutation search methods are efficient, yet faster
                 search is possible",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1618--1629",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824059",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We survey permutation-based methods for approximate k
                 -nearest neighbor search. In these methods, every data
                 point is represented by a ranked list of pivots sorted
                 by the distance to this point. Such ranked lists are
                 called permutations. The underpinning assumption is
                 that, for both metric and non-metric spaces, the
                 distance between permutations is a good proxy for the
                 distance between original points. Thus, it should be
                 possible to efficiently retrieve most true nearest
                 neighbors by examining only a tiny subset of data
                 points whose permutations are similar to the
                 permutation of a query. We further test this assumption
                 by carrying out an extensive experimental evaluation
                 where permutation methods are pitted against
                 state-of-the art benchmarks (the multi-probe LSH, the
                 VP-tree, and proximity-graph based retrieval) on a
                 variety of realistically large data set from the image
                 and textual domain. The focus is on the high-accuracy
                 retrieval methods for generic spaces. Additionally, we
                 assume that both data and indices are stored in main
                 memory. We find permutation methods to be reasonably
                 efficient and describe a setup where these methods are
                 most useful. To ease reproducibility, we make our
                 software and data sets publicly available.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mukherjee:2015:DAO,
  author =       "Niloy Mukherjee and Shasank Chavan and Maria Colgan
                 and Dinesh Das and Mike Gleeson and Sanket Hase and
                 Allison Holloway and Hui Jin and Jesse Kamp and Kartik
                 Kulkarni and Tirthankar Lahiri and Juan Loaiza and Neil
                 Macnaughton and Vineet Marwah and Atrayee Mullick and
                 Andy Witkowski and Jiaqi Yan and Mohamed Zait",
  title =        "Distributed architecture of {Oracle} database
                 in-memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1630--1641",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824061",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Over the last few years, the information technology
                 industry has witnessed revolutions in multiple
                 dimensions. Increasing ubiquitous sources of data have
                 posed two connected challenges to data management
                 solutions --- processing unprecedented volumes of data,
                 and providing ad-hoc real-time analysis in mainstream
                 production data stores without compromising regular
                 transactional workload performance. In parallel,
                 computer hardware systems are scaling out elastically,
                 scaling up in the number of processors and cores, and
                 increasing main memory capacity extensively. The data
                 processing challenges combined with the rapid
                 advancement of hardware systems has necessitated the
                 evolution of a new breed of main-memory databases
                 optimized for mixed OLTAP environments and designed to
                 scale. The Oracle RDBMS In-memory Option (DBIM) is an
                 industry-first distributed dual format architecture
                 that allows a database object to be stored in columnar
                 format in main memory highly optimized to break
                 performance barriers in analytic query workloads,
                 simultaneously maintaining transactional consistency
                 with the corresponding OLTP optimized row-major format
                 persisted in storage and accessed through database
                 buffer cache. In this paper, we present the
                 distributed, highly-available, and fault-tolerant
                 architecture of the Oracle DBIM that enables the RDBMS
                 to transparently scale out in a database cluster, both
                 in terms of memory capacity and query processing
                 throughput. We believe that the architecture is unique
                 among all mainstream in-memory databases. It allows
                 complete application-transparent, extremely scalable
                 and automated distribution of Oracle RDBMS objects
                 in-memory across a cluster, as well as across multiple
                 NUMA nodes within a single server. It seamlessly
                 provides distribution awareness to the Oracle SQL
                 execution framework through affinitized fault-tolerant
                 parallel execution within and across servers without
                 explicit optimizer plan changes or query rewrites.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Haas:2015:AMC,
  author =       "Daniel Haas and Jason Ansel and Lydia Gu and Adam
                 Marcus",
  title =        "{Argonaut}: macrotask crowdsourcing for complex data
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1642--1653",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824062",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Crowdsourced workflows are used in research and
                 industry to solve a variety of tasks. The databases
                 community has used crowd workers in query
                 operators/optimization and for tasks such as entity
                 resolution. Such research utilizes microtasks where
                 crowd workers are asked to answer simple yes/no or
                 multiple choice questions with little training.
                 Typically, microtasks are used with voting algorithms
                 to combine redundant responses from multiple crowd
                 workers to achieve result quality. Microtasks are
                 powerful, but fail in cases where larger context (e.g.,
                 domain knowledge) or significant time investment is
                 needed to solve a problem, for example in
                 large-document structured data extraction. In this
                 paper, we consider context-heavy data processing tasks
                 that may require many hours of work, and refer to such
                 tasks as macrotasks. Leveraging the infrastructure and
                 worker pools of existing crowdsourcing platforms, we
                 automate macrotask scheduling, evaluation, and pay
                 scales. A key challenge in macrotask-powered work,
                 however, is evaluating the quality of a worker's
                 output, since ground truth is seldom available and
                 redundancy-based quality control schemes are
                 impractical. We present Argonaut, a framework that
                 improves macrotask powered work quality using a
                 hierarchical review. Argonaut uses a predictive model
                 of worker quality to select trusted workers to perform
                 review, and a separate predictive model of task quality
                 to decide which tasks to review. Finally, Argonaut can
                 identify the ideal trade-off between a single phase of
                 review and multiple phases of review given a
                 constrained review budget in order to maximize overall
                 output quality. We evaluate an industrial use of
                 Argonaut to power a structured data extraction pipeline
                 that has utilized over half a million hours of crowd
                 worker input to complete millions of macrotasks. We
                 show that Argonaut can capture up to 118\% more errors
                 than random spot-check reviews in review
                 budget-constrained environments with up to two review
                 layers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2015:BRL,
  author =       "Guozhang Wang and Joel Koshy and Sriram Subramanian
                 and Kartik Paramasivam and Mammad Zadeh and Neha
                 Narkhede and Jun Rao and Jay Kreps and Joe Stein",
  title =        "Building a replicated logging system with {Apache
                 Kafka}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1654--1655",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824063",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Apache Kafka is a scalable publish-subscribe messaging
                 system with its core architecture as a distributed
                 commit log. It was originally built at LinkedIn as its
                 centralized event pipelining platform for online data
                 integration tasks. Over the past years developing and
                 operating Kafka, we extend its log-structured
                 architecture as a replicated logging backbone for much
                 wider application scopes in the distributed
                 environment. In this abstract, we will talk about our
                 design and engineering experience to replicate Kafka
                 logs for various distributed data-driven systems at
                 LinkedIn, including source-of-truth data storage and
                 stream processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Loro:2015:ISH,
  author =       "Alessandra Loro and Anja Gruenheid and Donald Kossmann
                 and Damien Profeta and Philippe Beaudequin",
  title =        "Indexing and selecting hierarchical business logic",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1656--1667",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824064",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Business rule management is the task of storing and
                 maintaining company-specific decision rules and
                 business logic that is queried frequently by
                 application users. These rules can impede efficient
                 query processing when they require the business rule
                 engine to resolve semantic hierarchies. To address this
                 problem, this work discusses hierarchical indexes that
                 are performance and storage-conscious. In the first
                 part of this work, we develop a tree-based hierarchical
                 structure that represents client-defined semantic
                 hierarchies as well as two variants of this structure
                 that improve performance and main memory allocation.
                 The second part of our work focuses on selecting the
                 top rules out of those retrieved from the index. We
                 formally define a priority score-based decision scheme
                 that allows for a conflict-free rule system and
                 efficient rule ranking. Additionally, we introduce a
                 weight-based lazy merging technique for rule selection.
                 All of these techniques are evaluated with real world
                 and synthetic data sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shukla:2015:SAI,
  author =       "Dharma Shukla and Shireesh Thota and Karthik Raman and
                 Madhan Gajendran and Ankur Shah and Sergii Ziuzin and
                 Krishnan Sundaram and Miguel Gonzalez Guajardo and Anna
                 Wawrzyniak and Samer Boshra and Renato Ferreira and
                 Mohamed Nassar and Michael Koltachev and Ji Huang and
                 Sudipta Sengupta and Justin Levandoski and David
                 Lomet",
  title =        "Schema-agnostic indexing with {Azure DocumentDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1668--1679",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824065",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Azure DocumentDB is Microsoft's multi-tenant
                 distributed database service for managing JSON
                 documents at Internet scale. DocumentDB is now
                 generally available to Azure developers. In this paper,
                 we describe the DocumentDB indexing subsystem.
                 DocumentDB indexing enables automatic indexing of
                 documents without requiring a schema or secondary
                 indices. Uniquely, DocumentDB provides real-time
                 consistent queries in the face of very high rates of
                 document updates. As a multi-tenant service, DocumentDB
                 is designed to operate within extremely frugal resource
                 budgets while providing predictable performance and
                 robust resource isolation to its tenants. This paper
                 describes the DocumentDB capabilities, including
                 document representation, query language, document
                 indexing approach, core index support, and early
                 production experiences.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Boutin:2015:JRI,
  author =       "Eric Boutin and Paul Brett and Xiaoyu Chen and Jaliya
                 Ekanayake and Tao Guan and Anna Korsun and Zhicheng Yin
                 and Nan Zhang and Jingren Zhou",
  title =        "{JetScope}: reliable and interactive analytics at
                 cloud scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1680--1691",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824066",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Interactive, reliable, and rich data analytics at
                 cloud scale is a key capability to support low latency
                 data exploration and experimentation over terabytes of
                 data for a wide range of business scenarios. Besides
                 the challenges in massive scalability and low latency
                 distributed query processing, it is imperative to
                 achieve all these requirements with effective fault
                 tolerance and efficient recovery, as failures and
                 fluctuations are the norm in such a distributed
                 environment. We present a cloud scale interactive query
                 processing system, called JetScope, developed at
                 Microsoft. The system has a SQL-like declarative
                 scripting language and delivers massive scalability and
                 high performance through advanced optimizations. In
                 order to achieve low latency, the system leverages
                 various access methods, optimizes delivering first
                 rows, and maximizes network and scheduling efficiency.
                 The system also provides a fine-grained fault tolerance
                 mechanism which is able to efficiently detect and
                 mitigate failures without significantly impacting the
                 query latency and user experience. JetScope has been
                 deployed to hundreds of servers in production at
                 Microsoft, serving a few million queries every day.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hu:2015:DPT,
  author =       "Xueyang Hu and Mingxuan Yuan and Jianguo Yao and Yu
                 Deng and Lei Chen and Qiang Yang and Haibing Guan and
                 Jia Zeng",
  title =        "Differential privacy in telco big data platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1692--1703",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824067",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Differential privacy (DP) has been widely explored in
                 academia recently but less so in industry possibly due
                 to its strong privacy guarantee. This paper makes the
                 first attempt to implement three basic DP architectures
                 in the deployed telecommunication (telco) big data
                 platform for data mining applications. We find that all
                 DP architectures have less than 5\% loss of prediction
                 accuracy when the weak privacy guarantee is adopted
                 (e.g., privacy budget parameter $ \epsilon \geq 3$).
                 However, when the strong privacy guarantee is assumed
                 (e.g., privacy budget parameter $ \epsilon \leq = 0
                 \colon 1$), all DP architectures lead to 15\%--30\%
                 accuracy loss, which implies that real-word industrial
                 data mining systems cannot work well under such a
                 strong privacy guarantee recommended by previous
                 research works. Among the three basic DP architectures,
                 the Hybridized DM (Data Mining) and DB (Database)
                 architecture performs the best because of its
                 complicated privacy protection design for the specific
                 data mining algorithm. Through extensive experiments on
                 big data, we also observe that the accuracy loss
                 increases by increasing the variety of features, but
                 decreases by increasing the volume of training data.
                 Therefore, to make DP practically usable in large-scale
                 industrial systems, our observations suggest that we
                 may explore three possible research directions in
                 future: (1) Relaxing the privacy guarantee (e.g.,
                 increasing privacy budget $ \epsilon $) and studying
                 its effectiveness on specific industrial applications;
                 (2) Designing specific privacy scheme for specific data
                 mining algorithms; and (3) Using large volume of data
                 but with low variety for training the classification
                 models.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{El-Helw:2015:OCT,
  author =       "Amr El-Helw and Venkatesh Raghavan and Mohamed A.
                 Soliman and George Caragea and Zhongxian Gu and
                 Michalis Petropoulos",
  title =        "Optimization of common table expressions in {MPP}
                 database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1704--1715",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824068",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Big Data analytics often include complex queries with
                 similar or identical expressions, usually referred to
                 as Common Table Expressions (CTEs). CTEs may be
                 explicitly defined by users to simplify query
                 formulations, or implicitly included in queries
                 generated by business intelligence tools, financial
                 applications and decision support systems. In Massively
                 Parallel Processing (MPP) database systems, CTEs pose
                 new challenges due to the distributed nature of query
                 processing, the overwhelming volume of underlying data
                 and the scalability criteria that systems are required
                 to meet. In these settings, the effective optimization
                 and efficient execution of CTEs are crucial for the
                 timely processing of analytical queries over Big Data.
                 In this paper, we present a comprehensive framework for
                 the representation, optimization and execution of CTEs
                 in the context of Orca --- Pivotal's query optimizer
                 for Big Data. We demonstrate experimentally the
                 benefits of our techniques using industry standard
                 decision support benchmark.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Goel:2015:TSR,
  author =       "Anil K. Goel and Jeffrey Pound and Nathan Auch and
                 Peter Bumbulis and Scott MacLean and Franz F{\"a}rber
                 and Francis Gropengiesser and Christian Mathis and
                 Thomas Bodner and Wolfgang Lehner",
  title =        "Towards scalable real-time analytics: an architecture
                 for scale-out of {OLxP} workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1716--1727",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824069",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present an overview of our work on the SAP HANA
                 Scale-out Extension, a novel distributed database
                 architecture designed to support large scale analytics
                 over real-time data. This platform permits high
                 performance OLAP with massive scale-out capabilities,
                 while concurrently allowing OLTP workloads. This dual
                 capability enables analytics over real-time changing
                 data and allows fine grained user-specified service
                 level agreements (SLAs) on data freshness. We advocate
                 the decoupling of core database components such as
                 query processing, concurrency control, and persistence,
                 a design choice made possible by advances in
                 high-throughput low-latency networks and storage
                 devices. We provide full ACID guarantees and build on a
                 logical timestamp mechanism to provide MVCC-based
                 snapshot isolation, while not requiring synchronous
                 updates of replicas. Instead, we use asynchronous
                 update propagation guaranteeing consistency with
                 timestamp validation. We provide a view into the design
                 and development of a large scale data management
                 platform for real-time analytics, driven by the needs
                 of modern enterprise customers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dasu:2015:FMF,
  author =       "Tamraparni Dasu and Vladislav Shkapenyuk and Divesh
                 Srivastava and Deborah F. Swayne",
  title =        "{FIT} to monitor feed quality",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1728--1739",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824070",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While there has been significant focus on collecting
                 and managing data feeds, it is only now that attention
                 is turning to their quality. In this paper, we propose
                 a principled approach to online data quality monitoring
                 in a dynamic feed environment. Our goal is to alert
                 quickly when feed behavior deviates from expectations.
                 We make contributions in two distinct directions.
                 First, we propose novel enhancements to permit a
                 publish-subscribe approach to incorporate data quality
                 modules into the DFMS architecture. Second, we propose
                 novel temporal extensions to standard statistical
                 techniques to adapt them to online feed monitoring for
                 outlier detection and alert generation at multiple
                 scales along three dimensions: aggregation at multiple
                 time intervals to detect at varying levels of
                 sensitivity; multiple lengths of data history for
                 varying the speed at which models adapt to change; and
                 multiple levels of monitoring delay to address lagged
                 data arrival. FIT, or Feed Inspection Tool, is the
                 result of a successful implementation of our approach.
                 We present several case studies outlining the effective
                 deployment of FIT in real applications along with user
                 testimonials.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Larson:2015:RTA,
  author =       "Per-{\AA}ke Larson and Adrian Birka and Eric N. Hanson
                 and Weiyun Huang and Michal Nowakiewicz and Vassilis
                 Papadimos",
  title =        "Real-time analytical processing with {SQL} server",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1740--1751",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824071",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Over the last two releases SQL Server has integrated
                 two specialized engines into the core system: the
                 Apollo column store engine for analytical workloads and
                 the Hekaton in-memory engine for high-performance OLTP
                 workloads. There is an increasing demand for real-time
                 analytics, that is, for running analytical queries and
                 reporting on the same system as transaction processing
                 so as to have access to the freshest data. SQL Server
                 2016 will include enhancements to column store indexes
                 and in-memory tables that significantly improve
                 performance on such hybrid workloads. This paper
                 describes four such enhancements: column store indexes
                 on in-memory tables, making secondary column store
                 indexes on disk-based tables updatable, allowing B-tree
                 indexes on primary column store indexes, and further
                 speeding up the column store scan oper ator.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2015:EEO,
  author =       "You Wu and Boulos Harb and Jun Yang and Cong Yu",
  title =        "Efficient evaluation of object-centric exploration
                 queries for visualization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1752--1763",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824072",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The most effective way to explore data is through
                 visualizing the results of exploration queries. For
                 example, an exploration query could be an aggregate of
                 some measures over time intervals, and a pattern or
                 abnormality can be discovered through a time series
                 plot of the query results. In this paper, we examine a
                 special kind of exploration query, namely
                 object-centric exploration query. Common examples
                 include claims made about athletes in sports databases,
                 such as ``it is newsworthy that LeBron James has scored
                 35 or more points in nine consecutive games.'' We focus
                 on one common type of visualization, i.e., 2d scatter
                 plot with heatmap. Namely, we consider exploration
                 queries whose results can be plotted on a
                 two-dimensional space, possibly with colors indicating
                 object densities in regions. While we model results as
                 pairs of numbers, the types of the queries are limited
                 only by the users' imagination. In the LeBron James
                 example above, the two dimensions are minimum points
                 scored per game and number of consecutive games,
                 respectively. It is easy to find other equally
                 interesting dimensions, such as minimum rebounds per
                 game or number of playoff games. We formalize this
                 problem and propose an efficient, interactive-speed
                 algorithm that takes a user-provided exploration query
                 (which can be a blackbox function) and produces an
                 approximate visualization that preserves the two most
                 important visual properties: the outliers and the
                 overall distribution of all result points.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qiao:2015:GUD,
  author =       "Lin Qiao and Yinan Li and Sahil Takiar and Ziyang Liu
                 and Narasimha Veeramreddy and Min Tu and Ying Dai and
                 Issac Buenrostro and Kapil Surlaker and Shirshanka Das
                 and Chavdar Botev",
  title =        "{Gobblin}: unifying data ingestion for {Hadoop}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1764--1769",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824073",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data ingestion is an essential part of companies and
                 organizations that collect and analyze large volumes of
                 data. This paper describes Gobblin, a generic data
                 ingestion framework for Hadoop and one of LinkedIn's
                 latest open source products. At LinkedIn we need to
                 ingest data from various sources such as relational
                 stores, NoSQL stores, streaming systems, REST
                 endpoints, filesystems, etc. into our Hadoop clusters.
                 Maintaining independent pipelines for each source can
                 lead to various operational problems. Gobblin aims to
                 solve this issue by providing a centralized data
                 ingestion framework that makes it easy to support
                 ingesting data from a variety of sources. Gobblin
                 distinguishes itself from similar frameworks by
                 focusing on three core principles: generality,
                 extensibility, and operability. Gobblin supports a
                 mixture of data sources out-of-the-box and can be
                 easily extended for more. This enables an organization
                 to use a single framework to handle different data
                 ingestion needs, making it easy and inexpensive to
                 operate. Moreover, with an end-to-end metrics
                 collection and reporting module, Gobblin makes it
                 simple and efficient to identify issues in
                 production.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Das:2015:QOO,
  author =       "Dinesh Das and Jiaqi Yan and Mohamed Zait and
                 Satyanarayana R. Valluri and Nirav Vyas and Ramarajan
                 Krishnamachari and Prashant Gaharwar and Jesse Kamp and
                 Niloy Mukherjee",
  title =        "Query optimization in {Oracle 12c} database
                 in-memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1770--1781",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824074",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional on-disk row major tables have been the
                 dominant storage mechanism in relational databases for
                 decades. Over the last decade, however, with explosive
                 growth in data volume and demand for faster analytics,
                 has come the recognition that a different data
                 representation is needed. There is widespread agreement
                 that in-memory column-oriented databases are best
                 suited to meet the realities of this new world. Oracle
                 12c Database In-memory, the industry's first
                 dual-format database, allows existing row major on-disk
                 tables to have complementary in-memory columnar
                 representations. The new storage format brings new data
                 processing techniques and query execution algorithms
                 and thus new challenges for the query optimizer.
                 Execution plans that are optimal for one format may be
                 sub-optimal for the other. In this paper, we describe
                 the changes made in the query optimizer to generate
                 execution plans optimized for the specific format ---
                 row major or columnar --- that will be scanned during
                 query execution. With enhancements in several areas ---
                 statistics, cost model, query transformation, access
                 path and join optimization, parallelism, and
                 cluster-awareness --- the query optimizer plays a
                 significant role in unlocking the full promise and
                 performance of Oracle Database In-Memory.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Green:2015:LPL,
  author =       "Todd J. Green and Dan Olteanu and Geoffrey Washburn",
  title =        "Live programming in the {LogicBlox} system: a
                 {MetaLogiQL} approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1782--1791",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824075",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The emerging category of self-service enterprise
                 applications motivates support for ``live programming''
                 in the database, where the user's iterative data
                 exploration triggers changes to installed application
                 code and its output in real time. This paper discusses
                 the technical challenges in supporting live programming
                 in the database and presents the solution implemented
                 in the LogicBlox commercial system. The workhorse
                 architectural component is a ``meta-engine'' that
                 incrementally maintains metadata representing
                 application code, guides its compilation into an
                 internal representation in the database kernel, and
                 orchestrates maintenance of materialized views based on
                 those changes. Our approach mirrors LogicBlox's
                 declarative programming model and describes the
                 maintenance of application code using declarative
                 meta-rules; the meta-engine is essentially a
                 ``bootstrap'' version of the database engine proper.
                 Beyond live programming, the meta-engine turns out
                 effective for a range of static analysis and
                 optimization tasks. Outside of the database context, we
                 speculate that our design may even provide a novel
                 means of building incremental compilers for
                 general-purpose programming languages.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Akidau:2015:DMP,
  author =       "Tyler Akidau and Robert Bradshaw and Craig Chambers
                 and Slava Chernyak and Rafael J.
                 Fern{\'a}ndez-Moctezuma and Reuven Lax and Sam McVeety
                 and Daniel Mills and Frances Perry and Eric Schmidt and
                 Sam Whittle",
  title =        "The dataflow model: a practical approach to balancing
                 correctness, latency, and cost in massive-scale,
                 unbounded, out-of-order data processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1792--1803",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824076",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Unbounded, unordered, global-scale datasets are
                 increasingly common in day-to-day business (e.g. Web
                 logs, mobile usage statistics, and sensor networks). At
                 the same time, consumers of these datasets have evolved
                 sophisticated requirements, such as event-time ordering
                 and windowing by features of the data themselves, in
                 addition to an insatiable hunger for faster answers.
                 Meanwhile, practicality dictates that one can never
                 fully optimize along all dimensions of correctness,
                 latency, and cost for these types of input. As a
                 result, data processing practitioners are left with the
                 quandary of how to reconcile the tensions between these
                 seemingly competing propositions, often resulting in
                 disparate implementations and systems. We propose that
                 a fundamental shift of approach is necessary to deal
                 with these evolved requirements in modern data
                 processing. We as a field must stop trying to groom
                 unbounded datasets into finite pools of information
                 that eventually become complete, and instead live and
                 breathe under the assumption that we will never know if
                 or when we have seen all of our data, only that new
                 data will arrive, old data may be retracted, and the
                 only way to make this problem tractable is via
                 principled abstractions that allow the practitioner the
                 choice of appropriate tradeoffs along the axes of
                 interest: correctness, latency, and cost. In this
                 paper, we present one such approach, the Dataflow
                 Model, along with a detailed examination of the
                 semantics it enables, an overview of the core
                 principles that guided its design, and a validation of
                 the model itself via the real-world experiences that
                 led to its development.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ching:2015:OTE,
  author =       "Avery Ching and Sergey Edunov and Maja Kabiljo and
                 Dionysios Logothetis and Sambavi Muthukrishnan",
  title =        "One trillion edges: graph processing at
                 {Facebook}-scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1804--1815",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824077",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analyzing large graphs provides valuable insights for
                 social networking and web companies in content ranking
                 and recommendations. While numerous graph processing
                 systems have been developed and evaluated on available
                 benchmark graphs of up to 6.6B edges, they often face
                 significant difficulties in scaling to much larger
                 graphs. Industry graphs can be two orders of magnitude
                 larger --- hundreds of billions or up to one trillion
                 edges. In addition to scalability challenges, real
                 world applications often require much more complex
                 graph processing workflows than previously evaluated.
                 In this paper, we describe the usability, performance,
                 and scalability improvements we made to Apache Giraph,
                 an open-source graph processing system, in order to use
                 it on Facebook-scale graphs of up to one trillion
                 edges. We also describe several key extensions to the
                 original Pregel model that make it possible to develop
                 a broader range of production graph applications and
                 workflows as well as improve code reuse. Finally, we
                 report on real-world operations as well as performance
                 characteristics of several large-scale production
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pelkonen:2015:GFS,
  author =       "Tuomas Pelkonen and Scott Franklin and Justin Teller
                 and Paul Cavallaro and Qi Huang and Justin Meza and
                 Kaushik Veeraraghavan",
  title =        "{Gorilla}: a fast, scalable, in-memory time series
                 database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1816--1827",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824078",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale internet services aim to remain highly
                 available and responsive in the presence of unexpected
                 failures. Providing this service often requires
                 monitoring and analyzing tens of millions of
                 measurements per second across a large number of
                 systems, and one particularly effective solution is to
                 store and query such measurements in a time series
                 database (TSDB). A key challenge in the design of TSDBs
                 is how to strike the right balance between efficiency,
                 scalability, and reliability. In this paper we
                 introduce Gorilla, Facebook's in-memory TSDB. Our
                 insight is that users of monitoring systems do not
                 place much emphasis on individual data points but
                 rather on aggregate analysis, and recent data points
                 are of much higher value than older points to quickly
                 detect and diagnose the root cause of an ongoing
                 problem. Gorilla optimizes for remaining highly
                 available for writes and reads, even in the face of
                 failures, at the expense of possibly dropping small
                 amounts of data on the write path. To improve query
                 efficiency, we aggressively leverage compression
                 techniques such as delta-of-delta timestamps and XOR'd
                 floating point values to reduce Gorilla's storage
                 footprint by 10x. This allows us to store Gorilla's
                 data in memory, reducing query latency by 73x and
                 improving query throughput by 14x when compared to a
                 traditional database (HBase)-backed time series data.
                 This performance improvement has unlocked new
                 monitoring and debugging tools, such as time series
                 correlation search and more dense visualization tools.
                 Gorilla also gracefully handles failures from a
                 single-node to entire regions with little to no
                 operational overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Potharaju:2015:CLC,
  author =       "Rahul Potharaju and Joseph Chan and Luhui Hu and
                 Cristina Nita-Rotaru and Mingshi Wang and Liyuan Zhang
                 and Navendu Jain",
  title =        "{ConfSeer}: leveraging customer support knowledge
                 bases for automated misconfiguration detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1828--1839",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824079",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We introduce ConfSeer, an automated system that
                 detects potential configuration issues or deviations
                 from identified best practices by leveraging a
                 knowledge base (KB) of technical solutions. The
                 intuition is that these KB articles describe the
                 configuration problems and their fixes so if the system
                 can accurately understand them, it can automatically
                 pinpoint both the errors and their resolution.
                 Unfortunately, finding an accurate match is difficult
                 because (a) the KB articles are written in natural
                 language text, and (b) configuration files typically
                 contain a large number of parameters with a high value
                 range. Thus, expert-driven manual troubleshooting is
                 not scalable. While there are several state-of-the-art
                 techniques proposed for individual tasks such as
                 keyword matching, concept determination and entity
                 resolution, none offer a practical end-to-end solution
                 to detect problems in machine configurations. In this
                 paper, we describe our experiences building ConfSeer
                 using a novel combinations of ideas from natural
                 language processing, information retrieval and
                 interactive learning. ConfSeer powers the
                 recommendation engine behind Microsoft Operations
                 Management Suite that proposes fixes for software
                 configuration errors. The system has been running in
                 production for about a year to proactively find
                 misconfigurations on tens of thousands of servers. Our
                 evaluation of ConfSeer against an expert-defined
                 rule-based commercial system, an expert survey and web
                 search engines shows that it achieves 80\%-97.5\%
                 accuracy and incurs low runtime overheads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Armbrust:2015:SSR,
  author =       "Michael Armbrust and Tathagata Das and Aaron Davidson
                 and Ali Ghodsi and Andrew Or and Josh Rosen and Ion
                 Stoica and Patrick Wendell and Reynold Xin and Matei
                 Zaharia",
  title =        "Scaling spark in the real world: performance and
                 usability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1840--1843",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824080",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Apache Spark is one of the most widely used open
                 source processing engines for big data, with rich
                 language-integrated APIs and a wide range of libraries.
                 Over the past two years, our group has worked to deploy
                 Spark to a wide range of organizations through
                 consulting relationships as well as our hosted service,
                 Databricks. We describe the main challenges and
                 requirements that appeared in taking Spark to a wide
                 set of users, and usability and performance
                 improvements we have made to the engine in response.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sahli:2015:SLS,
  author =       "Majed Sahli and Essam Mansour and Panos Kalnis",
  title =        "{StarDB}: a large-scale {DBMS} for strings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1844--1847",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824082",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Strings and applications using them are proliferating
                 in science and business. Currently, strings are stored
                 in file systems and processed using ad-hoc procedural
                 code. Existing techniques are not flexible and cannot
                 efficiently handle complex queries or large datasets.
                 In this paper, we demonstrate StarDB, a distributed
                 database system for analytics on strings. StarDB hides
                 data and system complexities and allows users to focus
                 on analytics. It uses a comprehensive set of parallel
                 string operations and provides a declarative query
                 language to solve complex queries. StarDB automatically
                 tunes itself and runs with over 90\% efficiency on
                 supercomputers, public clouds, clusters, and
                 workstations. We test StarDB using real datasets that
                 are 2 orders of magnitude larger than the datasets
                 reported by previous works.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Harbi:2015:ESQ,
  author =       "Razen Harbi and Ibrahim Abdelaziz and Panos Kalnis and
                 Nikos Mamoulis",
  title =        "Evaluating {SPARQL} queries on massive {RDF}
                 datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1848--1851",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824083",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed RDF systems partition data across multiple
                 computer nodes. Partitioning is typically based on
                 heuristics that minimize inter-node communication and
                 it is performed in an initial, data pre-processing
                 phase. Therefore, the resulting partitions are static
                 and do not adapt to changes in the query workload; as a
                 result, existing systems are unable to consistently
                 avoid communication for queries that are not favored by
                 the initial data partitioning. Furthermore, for very
                 large RDF knowledge bases, the partitioning phase
                 becomes prohibitively expensive, leading to high
                 startup costs. In this paper, we propose AdHash, a
                 distributed RDF system which addresses the shortcomings
                 of previous work. First, AdHash initially applies
                 lightweight hash partitioning, which drastically
                 minimizes the startup cost, while favoring the parallel
                 processing of join patterns on subjects, without any
                 data communication. Using a locality-aware planner,
                 queries that cannot be processed in parallel are
                 evaluated with minimal communication. Second, AdHash
                 monitors the data access patterns and adapts
                 dynamically to the query load by incrementally
                 redistributing and replicating frequently accessed
                 data. As a result, the communication cost for future
                 queries is drastically reduced or even eliminated. Our
                 experiments with synthetic and real data verify that
                 AdHash (i) starts faster than all existing systems,
                 (ii) processes thousands of queries before other
                 systems become online, and (iii) gracefully adapts to
                 the query load, being able to evaluate queries on
                 billion-scale RDF data in sub-seconds. In this
                 demonstration, audience can use a graphical interface
                 of AdHash to verify its performance superiority
                 compared to state-of-the-art distributed RDF systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kou:2015:TBR,
  author =       "Ngai Meng Kou and Leong Hou U. and Nikos Mamoulis and
                 Yuhong Li and Ye Li and Zhiguo Gong",
  title =        "A topic-based reviewer assignment system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1852--1855",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824084",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Peer reviewing is a widely accepted mechanism for
                 assessing the quality of submitted articles to
                 scientific conferences or journals. Conference
                 management systems (CMS) are used by conference
                 organizers to invite appropriate reviewers and assign
                 them to submitted papers. Typical CMS rely on paper
                 bids entered by the reviewers and apply simple matching
                 algorithms to compute the paper assignment. In this
                 paper, we demonstrate our Reviewer Assignment System
                 (RAS), which has advanced features compared to broadly
                 used CMSs. First, RAS automatically extracts the
                 profiles of reviewers and submissions in the form of
                 topic vectors. These profiles can be used to
                 automatically assign reviewers to papers without
                 relying on a bidding process, which can be tedious and
                 error-prone. Second, besides supporting classic
                 assignment models (e.g., stable marriage and optimal
                 assignment), RAS includes a recently published
                 assignment model by our research group, which
                 maximizes, for each paper, the coverage of its topics
                 by the profiles of its reviewers. The features of the
                 demonstration include (1) automatic extraction of paper
                 and reviewer profiles, (2) assignment computation by
                 different models, and (3) visualization of the results
                 by different models, in order to assess their
                 effectiveness.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liroz-Gistau:2015:FHE,
  author =       "Miguel Liroz-Gistau and Reza Akbarinia and Patrick
                 Valduriez",
  title =        "{FP--Hadoop}: efficient execution of parallel jobs
                 over skewed data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1856--1859",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824085",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Big data parallel frameworks, such as MapReduce or
                 Spark have been praised for their high scalability and
                 performance, but show poor performance in the case of
                 data skew. There are important cases where a high
                 percentage of processing in the reduce side ends up
                 being done by only one node. In this demonstration, we
                 illustrate the use of FP-Hadoop, a system that
                 efficiently deals with data skew in MapReduce jobs. In
                 FP-Hadoop, there is a new phase, called intermediate
                 reduce (IR), in which blocks of intermediate values,
                 constructed dynamically, are processed by intermediate
                 reduce workers in parallel, by using a scheduling
                 strategy. Within the IR phase, even if all intermediate
                 values belong to only one key, the main part of the
                 reducing work can be done in parallel using the
                 computing resources of all available workers. We
                 implemented a prototype of FP-Hadoop, and conducted
                 extensive experiments over synthetic and real datasets.
                 We achieve excellent performance gains compared to
                 native Hadoop, e.g. more than 10 times in reduce time
                 and 5 times in total execution time. During our
                 demonstration, we give the users the possibility to
                 execute and compare job executions in FP-Hadoop and
                 Hadoop. They can retrieve general information about the
                 job and the tasks and a summary of the phases. They can
                 also visually compare different configurations to
                 explore the difference between the approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papenbrock:2015:DPM,
  author =       "Thorsten Papenbrock and Tanja Bergmann and Moritz
                 Finke and Jakob Zwiener and Felix Naumann",
  title =        "Data profiling with {Metanome}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1860--1863",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824086",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data profiling is the discipline of discovering
                 metadata about given datasets. The metadata itself
                 serve a variety of use cases, such as data integration,
                 data cleansing, or query optimization. Due to the
                 importance of data profiling in practice, many tools
                 have emerged that support data scientists and IT
                 professionals in this task. These tools provide good
                 support for profiling statistics that are easy to
                 compute, but they are usually lacking automatic and
                 efficient discovery of complex statistics, such as
                 inclusion dependencies, unique column combinations, or
                 functional dependencies. We present Metanome, an
                 extensible profiling platform that incorporates many
                 state-of-the-art profiling algorithms. While Metanome
                 is able to calculate simple profiling statistics in
                 relational data, its focus lies on the automatic
                 discovery of complex metadata. Metanome's goal is to
                 provide novel profiling algorithms from research,
                 perform comparative evaluations, and to support
                 developers in building and testing new algorithms. In
                 addition, Metanome is able to rank profiling results
                 according to various metrics and to visualize the, at
                 times, large metadata sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kumar:2015:DSO,
  author =       "Arun Kumar and Mona Jalal and Boqun Yan and Jeffrey
                 Naughton and Jignesh M. Patel",
  title =        "Demonstration of {Santoku}: optimizing machine
                 learning over normalized data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1864--1867",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824087",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Advanced analytics is a booming area in the data
                 management industry and a hot research topic. Almost
                 all toolkits that implement machine learning (ML)
                 algorithms assume that the input is a single table, but
                 most relational datasets are not stored as single
                 tables due to normalization. Thus, analysts often join
                 tables to obtain a denormalized table. Also, analysts
                 typically ignore any functional dependencies among
                 features because ML toolkits do not support them. In
                 both cases, time is wasted in learning over data with
                 redundancy. We demonstrate Santoku, a toolkit to help
                 analysts improve the performance of ML over normalized
                 data. Santoku applies the idea of factorized learning
                 and automatically decides whether to denormalize or
                 push ML computations through joins. Santoku also
                 exploits database dependencies to provide automatic
                 insights that could help analysts with exploratory
                 feature selection. It is usable as a library in R,
                 which is a popular environment for advanced analytics.
                 We demonstrate the benefits of Santoku in improving ML
                 performance and helping analysts with feature
                 selection.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Seah:2015:PCP,
  author =       "Boon Siew Seah and Sourav S. Bhowmick and Aixin Sun",
  title =        "{PRISM}: concept-preserving summarization of top-$k$
                 social image search results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1868--1871",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824088",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Most existing tag-based social image search engines
                 present search results as a ranked list of images,
                 which cannot be consumed by users in a natural and
                 intuitive manner. In this demonstration, we present a
                 novel concept-preserving image search results
                 summarization system called prism. prism exploits both
                 visual features and tags of the search results to
                 generate high quality summary, which not only breaks
                 the results into visually and semantically coherent
                 clusters but it also maximizes the coverage of the
                 original top- k search results. It first constructs a
                 visual similarity graph where the nodes are images in
                 the top- k search results and the edges represent
                 visual similarities between pairs of images. This graph
                 is optimally decomposed and compressed into a set of
                 concept-preserving subgraphs based on a set of
                 summarization criteria. One or more exemplar images
                 from each subgraph is selected to form the exemplar
                 summary of the result set. We demonstrate various
                 innovative features of prism and the promise of
                 superior quality summary construction of social image
                 search results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Muller:2015:PST,
  author =       "Tobias M{\"u}ller and Torsten Grust",
  title =        "Provenance for {SQL} through abstract interpretation:
                 value-less, but worthwhile",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1872--1875",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824089",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate the derivation of fine-grained where
                 --- and why -provenance for a rich dialect of SQL that
                 includes recursion, (correlated) subqueries, windows,
                 grouping/aggregation, and the RDBMS's library of
                 built-in functions. The approach relies on ideas that
                 originate in the programming language
                 community---program slicing and abstract
                 interpretation, in particular. A two-stage process
                 first records a query's control flow decisions and
                 locations of data access before it derives provenance
                 without consultation of the actual data values
                 (rendering the method largely ``value-less''). We will
                 bring an interactive demonstrator that uses this
                 provenance information to make input/output
                 dependencies in real-world SQL queries tangible.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2015:SSQ,
  author =       "Zhian He and Wai Kit Wong and Ben Kao and David Wai
                 Lok Cheung and Rongbin Li and Siu Ming Yiu and Eric
                 Lo",
  title =        "{SDB}: a secure query processing system with data
                 interoperability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1876--1879",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824090",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We address security issues in a cloud database system
                 which employs the DBaaS model --- a data owner (DO)
                 exports data to a cloud database service provider (SP).
                 To provide data security, sensitive data is encrypted
                 by the DO before it is uploaded to the SP. Compared to
                 existing secure query processing systems like CryptDB
                 [7] and MONOMI [8], in which data operations (e.g.,
                 comparison or addition) are supported by specialized
                 encryption schemes, our demo system, SDB, is
                 implemented based on a set of data-interoperable secure
                 operators, i.e., the output of an operator can be used
                 as input of another operator. As a result, SDB can
                 support a wide range of complex queries (e.g., all
                 TPC-H queries) efficiently. In this demonstration, we
                 show how our SDB prototype supports secure query
                 processing on complex workload like TPC-H. We also
                 demonstrate how our system protects sensitive
                 information from malicious attackers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abdelaziz:2015:SVC,
  author =       "Ibrahim Abdelaziz and Razen Harbi and Semih Salihoglu
                 and Panos Kalnis and Nikos Mamoulis",
  title =        "{SPARTex}: a vertex-centric framework for {RDF} data
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1880--1883",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824091",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A growing number of applications require combining
                 SPARQL queries with generic graph search on RDF data.
                 However, the lack of procedural capabilities in SPARQL
                 makes it inappropriate for graph analytics. Moreover,
                 RDF engines focus on SPARQL query evaluation whereas
                 graph management frameworks perform only generic graph
                 computations. In this work, we bridge the gap by
                 introducing SPARTex, an RDF analytics framework based
                 on the vertex-centric computation model. In SPARTex,
                 user-defined vertex centric programs can be invoked
                 from SPARQL as stored procedures. SPARTex allows the
                 execution of a pipeline of graph algorithms without the
                 need for multiple reads/writes of input data and
                 intermediate results. We use a cost-based optimizer for
                 minimizing the communication cost. SPARTex evaluates
                 queries that combine SPARQL and generic graph
                 computations orders of magnitude faster than existing
                 RDF engines. We demonstrate a real system prototype of
                 SPARTex running on a local cluster using real and
                 synthetic datasets. SPARTex has a real-time graphical
                 user interface that allows the participants to write
                 regular SPARQL queries, use our proposed SPARQL
                 extension to declaratively invoke graph algorithms or
                 combine/pipeline both SPARQL querying and generic graph
                 analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2015:IDG,
  author =       "Lu Chen and Yunjun Gao and Zhihao Xing and Christian
                 S. Jensen and Gang Chen",
  title =        "{I2RS}: a distributed geo-textual image retrieval and
                 recommendation system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1884--1887",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824092",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Massive amounts of geo-tagged and textually annotated
                 images are provided by online photo services such as
                 Flickr and Zommr. However, most existing image
                 retrieval engines only consider text annotations. We
                 present I2RS, a system that allows users to view
                 geo-textual images on Google Maps, find hot topics
                 within a specific geographic region and time period,
                 retrieve images similar to a query image, and receive
                 recommended images that they might be interested in.
                 I2RS is a distributed geo-textual image retrieval and
                 recommendation system that employs SPB-trees to index
                 geo-textual images, and that utilizes metric similarity
                 queries, including top-$m$ spatio-temporal range and k
                 nearest neighbor queries, to support geo-textual image
                 retrieval and recommendation. The system adopts the
                 browser-server model, whereas the server is deployed in
                 a distributed environment that enables efficiency and
                 scalability to huge amounts of data and requests. A
                 rich set of 100 million geo-textual images crawled from
                 Flickr is used to demonstrate that, I2RS can return
                 high-quality answers in an interactive way and support
                 efficient updates for high image arrival rates.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bursztyn:2015:RBQ,
  author =       "Damian Bursztyn and Fran{\c{c}}ois Goasdou{\'e} and
                 Ioana Manolescu",
  title =        "Reformulation-based query answering in {RDF}:
                 alternatives and performance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1888--1891",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824093",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Answering queries over Semantic Web data, i.e., RDF
                 graphs, must account for both explicit data and
                 implicit data, entailed by the explicit data and the
                 semantic constraints holding on them. Two main query
                 answering techniques have been devised, namely
                 Saturation -based (S at) which precomputes and adds to
                 the graph all implicit information, and Reformulation
                 -based (Ref) which reformulates the query based on the
                 graph constraints, so that evaluating the reformulated
                 query directly against the explicit data (i.e., without
                 considering the constraints) produces the query answer.
                 While S at is well known, Ref has received less
                 attention so far. In particular, reformulated queries
                 often perform poorly if the query is complex. Our
                 demonstration showcases a large set of Ref techniques,
                 including but not limited to one we proposed recently.
                 The audience will be able to 1: test them against
                 different datasets, constraints and queries, as well as
                 different well-established systems, 2: analyze and
                 understand the performance challenges they raise, and
                 3: alter the scenarios to visualize the impact on
                 performance. In particular, we show how a cost-based
                 Ref approach allows avoiding reformulation performance
                 pitfalls.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bux:2015:SSS,
  author =       "Marc Bux and J{\"o}rgen Brandt and Carsten Lipka and
                 Kamal Hakimzadeh and Jim Dowling and Ulf Leser",
  title =        "{SAASFEE}: scalable scientific workflow execution
                 engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1892--1895",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824094",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Across many fields of science, primary data sets like
                 sensor read-outs, time series, and genomic sequences
                 are analyzed by complex chains of specialized tools and
                 scripts exchanging intermediate results in
                 domain-specific file formats. Scientific workflow
                 management systems (SWfMSs) support the development and
                 execution of these tool chains by providing workflow
                 specification languages, graphical editors,
                 fault-tolerant execution engines, etc. However, many
                 SWfMSs are not prepared to handle large data sets
                 because of inadequate support for distributed
                 computing. On the other hand, most SWfMSs that do
                 support distributed computing only allow static task
                 execution orders. We present SAASFEE, a SWfMS which
                 runs arbitrarily complex workflows on Hadoop YARN.
                 Workflows are specified in Cuneiform, a functional
                 workflow language focusing on parallelization and easy
                 integration of existing software. Cuneiform workflows
                 are executed on Hi-WAY, a higher-level scheduler for
                 running workflows on YARN. Distinct features of SAASFEE
                 are the ability to execute iterative workflows, an
                 adaptive task scheduler, re-executable provenance
                 traces, and compatibility to selected other workflow
                 systems. In the demonstration, we present all
                 components of SAASFEE using real-life workflows from
                 the field of genomics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eldawy:2015:DHE,
  author =       "Ahmed Eldawy and Mohamed F. Mokbel and Christopher
                 Jonathan",
  title =        "A demonstration of {HadoopViz}: an extensible
                 {MapReduce} system for visualizing big spatial data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1896--1899",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824095",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This demonstration presents HadoopViz; an extensible
                 MapReduce-based system for visualizing Big Spatial
                 Data. HadoopViz has two main unique features that
                 distinguish it from other techniques. (1) It provides
                 an extensible interface that allows users to visualize
                 various types of data by defining five abstract
                 functions, without delving into the details of the
                 MapReduce algorithms. We show how it is used to create
                 four types of visualizations, namely, scatter plot,
                 road network, frequency heat map, and temperature heat
                 map. (2) HadoopViz is capable of generating big images
                 with giga-pixel resolution by employing a three-phase
                 approach of partitioning, rasterize, and merging.
                 HadoopViz generates single and multi-level images,
                 where the latter allows users to zoom in/out to get
                 more/less details. Both types of images are generated
                 with a very high resolution using the extensible and
                 scalable framework of HadoopViz.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bergman:2015:QQO,
  author =       "Moria Bergman and Tova Milo and Slava Novgorodov and
                 Wang-Chiew Tan",
  title =        "{QOCO}: a query oriented data cleaning system with
                 oracles",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1900--1903",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824096",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As key decisions are often made based on information
                 contained in a database, it is important for the
                 database to be as complete and correct as possible. For
                 this reason, many data cleaning tools have been
                 developed to automatically resolve inconsistencies in
                 databases. However, data cleaning tools provide only
                 best-effort results and usually cannot eradicate all
                 errors that may exist in a database. Even more
                 importantly, existing data cleaning tools do not
                 typically address the problem of determining what
                 information is missing from a database. To tackle these
                 problems, we present QOCO, a novel query oriented
                 cleaning system that leverages materialized views that
                 are defined by user queries as a trigger for
                 identifying the remaining incorrect/missing
                 information. Given a user query, QOCO interacts with
                 domain experts (which we model as oracle crowds) to
                 identify potentially wrong or missing answers in the
                 result of the user query, as well as determine and
                 correct the wrong data that is the cause for the
                 error(s). We will demonstrate QOCO over a World Cup
                 Games database, and illustrate the interaction between
                 QOCO and the oracles. Our demo audience will play the
                 role of oracles, and we show how QOCO's underlying
                 operations and optimization mechanisms can effectively
                 prune the search space and minimize the number of
                 questions that need to be posed to accelerate the
                 cleaning process.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ying:2015:TFS,
  author =       "Shanshan Ying and Flip Korn and Barna Saha and Divesh
                 Srivastava",
  title =        "{TreeScope}: finding structural anomalies in
                 semi-structured data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1904--1907",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824097",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Semi-structured data are prevalent on the web, with
                 formats such as XML and JSON soaring in popularity due
                 to their generality, flexibility and easy
                 customization. However, these very same features make
                 semi-structured data prone to a range of data quality
                 errors, from errors in content to errors in structure.
                 While the former has been well studied, little
                 attention has been paid to structural errors. In this
                 demonstration, we present T reeScope, which analyzes
                 semi-structured data sets with the goal of
                 automatically identifying structural anomalies from the
                 data. Our techniques learn robust structural models
                 that have high support, to identify potential errors in
                 the structure. Identified structural anomalies are then
                 concisely summarized to provide plausible explanations
                 of the potential errors. The goal of this demonstration
                 is to enable an interactive exploration of the process
                 of identifying and summarizing structural anomalies in
                 semi-structured data sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elmore:2015:DBP,
  author =       "A. Elmore and J. Duggan and M. Stonebraker and M.
                 Balazinska and U. Cetintemel and V. Gadepally and J.
                 Heer and B. Howe and J. Kepner and T. Kraska and S.
                 Madden and D. Maier and T. Mattson and S. Papadopoulos
                 and J. Parkhurst and N. Tatbul and M. Vartak and S.
                 Zdonik",
  title =        "A demonstration of the {BigDAWG} polystore system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1908--1911",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824098",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents BigDAWG, a reference
                 implementation of a new architecture for ``Big Data''
                 applications. Such applications not only call for
                 large-scale analytics, but also for real-time streaming
                 support, smaller analytics at interactive speeds, data
                 visualization, and cross-storage-system queries. Guided
                 by the principle that ``one size does not fit all'', we
                 build on top of a variety of storage engines, each
                 designed for a specialized use case. To illustrate the
                 promise of this approach, we demonstrate its
                 effectiveness on a hospital application using data from
                 an intensive care unit (ICU). This complex application
                 serves the needs of doctors and researchers and
                 provides real-time support for streams of patient data.
                 It showcases novel approaches for querying across
                 multiple storage engines, data visualization, and
                 scalable real-time analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zoumpatianos:2015:RID,
  author =       "Kostas Zoumpatianos and Stratos Idreos and Themis
                 Palpanas",
  title =        "{RINSE}: interactive data series exploration with
                 {ADS+}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1912--1915",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824099",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Numerous applications continuously produce big amounts
                 of data series, and in several time critical scenarios
                 analysts need to be able to query these data as soon as
                 they become available. An adaptive index data
                 structure, ADS+, which is specifically tailored to
                 solve the problem of indexing and querying very large
                 data series collections has been recently proposed as a
                 solution to this problem. The main idea is that instead
                 of building the complete index over the complete data
                 set up-front and querying only later, we interactively
                 and adaptively build parts of the index, only for the
                 parts of the data on which the users pose queries. The
                 net effect is that instead of waiting for extended
                 periods of time for the index creation, users can
                 immediately start exploring the data series. In this
                 work, we present a demonstration of ADS+; we introduce
                 RINSE, a system that allows users to experience the
                 benefits of the ADS+ adaptive index through an
                 intuitive web interface. Users can explore large
                 datasets and find patterns of interest, using nearest
                 neighbor search. They can draw queries (data series)
                 using a mouse, or touch screen, or they can select from
                 a predefined list of data series. RINSE can scale to
                 large data sizes, while drastically reducing the data
                 to query delay: by the time state-of-the-art indexing
                 techniques finish indexing 1 billion data series (and
                 before answering even a single query), adaptive data
                 series indexing can already answer 3 * 10$^5$
                 queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bhardwaj:2015:CDA,
  author =       "Anant Bhardwaj and Amol Deshpande and Aaron J. Elmore
                 and David Karger and Sam Madden and Aditya Parameswaran
                 and Harihar Subramanyam and Eugene Wu and Rebecca
                 Zhang",
  title =        "Collaborative data analytics with {DataHub}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1916--1919",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824100",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While there have been many solutions proposed for
                 storing and analyzing large volumes of data, all of
                 these solutions have limited support for collaborative
                 data analytics, especially given the many individuals
                 and teams are simultaneously analyzing, modifying and
                 exchanging datasets, employing a number of
                 heterogeneous tools or languages for data analysis, and
                 writing scripts to clean, preprocess, or query data. We
                 demonstrate DataHub, a unified platform with the
                 ability to load, store, query, collaboratively analyze,
                 interactively visualize, interface with external
                 applications, and share datasets. We will demonstrate
                 the following aspects of the DataHub platform: (a)
                 flexible data storage, sharing, and native versioning
                 capabilities: multiple conference attendees can
                 concurrently update the database and browse the
                 different versions and inspect conflicts; (b) an app
                 ecosystem that hosts apps for various data-processing
                 activities: conference attendees will be able to
                 effortlessly ingest, query, and visualize data using
                 our existing apps; (c) thrift-based data serialization
                 permits data analysis in any combination of 20+
                 languages, with DataHub as the common data store:
                 conference attendees will be able to analyze datasets
                 in R, Python, and Matlab, while the inputs and the
                 results are still stored in DataHub. In particular,
                 conference attendees will be able to use the DataHub
                 notebook ---an IPython-based notebook for analyzing
                 data and storing the results of data analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shin:2015:MDD,
  author =       "Jaeho Shin and Christopher R{\'e} and Michael
                 Cafarella",
  title =        "{Mindtagger}: a demonstration of data labeling in
                 knowledge base construction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1920--1923",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824101",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "End-to-end knowledge base construction systems using
                 statistical inference are enabling more people to
                 automatically extract high-quality domain-specific
                 information from unstructured data. As a result of
                 deploying DeepDive framework across several domains, we
                 found new challenges in debugging and improving such
                 end-to-end systems to construct high-quality knowledge
                 bases. DeepDive has an iterative development cycle in
                 which users improve the data. To help our users, we
                 needed to develop principles for analyzing the system's
                 error as well as provide tooling for inspecting and
                 labeling various data products of the system. We
                 created guidelines for error analysis modeled after our
                 colleagues' best practices, in which data labeling
                 plays a critical role in every step of the analysis. To
                 enable more productive and systematic data labeling, we
                 created Mindtagger, a versatile tool that can be
                 configured to support a wide range of tasks. In this
                 demonstration, we show in detail what data labeling
                 tasks are modeled in our error analysis guidelines and
                 how each of them is performed using Mindtagger.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koutra:2015:PIL,
  author =       "Danai Koutra and Di Jin and Yuanchi Ning and Christos
                 Faloutsos",
  title =        "{Perseus}: an interactive large-scale graph mining and
                 visualization tool",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1924--1927",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824102",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a large graph with several millions or billions
                 of nodes and edges, such as a social network, how can
                 we explore it efficiently and find out what is in the
                 data? In this demo we present P erseus, a large-scale
                 system that enables the comprehensive analysis of large
                 graphs by supporting the coupled summarization of graph
                 properties and structures, guiding attention to
                 outliers, and allowing the user to interactively
                 explore normal and anomalous node behaviors.
                 Specifically, P erseus provides for the following
                 operations: (1) It automatically extracts graph
                 invariants (e.g., degree, PageRank, real eigenvectors)
                 by performing scalable, offline batch processing on
                 Hadoop; (2) It interactively visualizes univariate and
                 bivariate distributions for those invariants; (3) It
                 summarizes the properties of the nodes that the user
                 selects; (4) It efficiently visualizes the induced
                 subgraph of a selected node and its neighbors, by
                 incrementally revealing its neighbors. In our
                 demonstration, we invite the audience to interact with
                 P erseus to explore a variety of multi-million-edge
                 social networks including a Wikipedia vote network, a
                 friendship/foeship network in Slashdot, and a trust
                 network based on the consumer review website
                 Epinions.com.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Joglekar:2015:SDN,
  author =       "Manas Joglekar and Hector Garcia-Molina and Aditya
                 Parameswaran",
  title =        "Smart drill-down: a new data exploration operator",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1928--1931",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824103",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a data exploration system equipped with
                 smart drill-down, a novel operator for interactively
                 exploring a relational table to discover and summarize
                 ``interesting'' groups of tuples. Each such group of
                 tuples is represented by a rule. For instance, the rule
                 (a, b, *, 1000) tells us that there are a thousand
                 tuples with value a in the first column and b in the
                 second column (and any value in the third column).
                 Smart drill-down presents an analyst with a list of
                 rules that together describe interesting aspects of the
                 table. The analyst can tailor the definition of
                 interesting, and can interactively apply smart
                 drill-down on an existing rule to explore that part of
                 the table. In the demonstration, conference attendees
                 will be able to use the data exploration system
                 equipped with smart drill-down, and will be able to
                 contrast smart drill-down to traditional drill-down,
                 for various interestingness measures, and resource
                 constraints.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dyreson:2015:VED,
  author =       "Curtis E. Dyreson and Sourav S. Bhowmick and Ryan
                 Grapp",
  title =        "Virtual {eXist-db}: liberating hierarchical queries
                 from the shackles of access path dependence",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1932--1935",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824104",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "XQuery programs can be hard to write and port to new
                 data collections because the path expressions in a
                 query are dependent on the hierarchy of the data. We
                 propose to demonstrate a system to liberate query
                 writers from this dependence. A plug-and-play query
                 contains a specification of what data the query needs
                 in order to evaluate. We implemented virtual eXist-db
                 to support plug-and-play XQuery queries. Our system
                 adds a virtualDoc function that lets a programmer
                 sketch the hierarchy needed by the query, which may
                 well be different than what the data has, and logically
                 (not physically) transforms the data (with information
                 loss guarantees) to the hierarchy specified by the
                 virtualDoc. The demonstration will consist of a
                 sequence of XQuery queries using a virtual hierarchy,
                 including queries suggested by the audience. We will
                 also demonstrate a GUI tool to construct a virtual
                 hierarchy.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cortez:2015:ADS,
  author =       "Eli Cortez and Philip A. Bernstein and Yeye He and Lev
                 Novik",
  title =        "Annotating database schemas to help enterprise
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1936--1939",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824105",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In large enterprises, data discovery is a common
                 problem faced by users who need to find relevant
                 information in relational databases. In this scenario,
                 schema annotation is a useful tool to enrich a database
                 schema with descriptive keywords. In this paper, we
                 demonstrate Barcelos, a system that automatically
                 annotates corporate databases. Unlike existing
                 annotation approaches that use Web oriented knowledge
                 bases, Barcelos mines enterprise spreadsheets to find
                 candidate annotations. Our experimental evaluation
                 shows that Barcelos produces high quality annotations;
                 the top-5 have an average precision of 87\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jayaram:2015:VAS,
  author =       "Nandish Jayaram and Sidharth Goyal and Chengkai Li",
  title =        "{VIIQ}: auto-suggestion enabled visual interface for
                 interactive graph query formulation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1940--1943",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824106",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present VIIQ (pronounced as wick), an interactive
                 and iterative visual query formulation interface that
                 helps users construct query graphs specifying their
                 exact query intent. Heterogeneous graphs are
                 increasingly used to represent complex relationships in
                 schemaless data, which are usually queried using query
                 graphs. Existing graph query systems offer little help
                 to users in easily choosing the exact labels of the
                 edges and vertices in the query graph. VIIQ helps users
                 easily specify their exact query intent by providing a
                 visual interface that lets them graphically add various
                 query graph components, backed by an edge suggestion
                 mechanism that suggests edges relevant to the user's
                 query intent. In this demo we present: (1) a detailed
                 description of the various features and user-friendly
                 graphical interface of VIIQ, (2) a brief description of
                 the edge suggestion algorithm, and (3) a demonstration
                 scenario that we intend to show the audience.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2015:FSS,
  author =       "Qingyuan Liu and Eduard C. Dragut and Arjun Mukherjee
                 and Weiyi Meng",
  title =        "{FLORIN}: a system to support (near) real-time
                 applications on user generated content on daily news",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1944--1947",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824107",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we propose a system, FLORIN, which
                 provides support for near real-time applications on
                 user generated content on daily news. FLORIN
                 continuously crawls news outlets for articles and user
                 comments accompanying them. It attaches the articles
                 and comments to daily event stories. It identifies the
                 opinionated content in user comments and performs named
                 entity recognition on news articles. All these pieces
                 of information are organized hierarchically and
                 exportable to other applications. Multiple applications
                 can be built on this data. We have implemented a
                 sentiment analysis system that runs on top of it.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2015:VVI,
  author =       "Yunyao Li and Elmer Kim and Marc A. Touchette and
                 Ramiya Venkatachalam and Hao Wang",
  title =        "{VINERy}: a visual {IDE} for information extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1948--1951",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824108",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Information Extraction (IE) is the key technology
                 enabling analytics over unstructured and
                 semi-structured data. Not surprisingly, it is becoming
                 a critical building block for a wide range of emerging
                 applications. To satisfy the rising demands for
                 information extraction in real-world applications, it
                 is crucial to lower the barrier to entry for IE
                 development and enable users with general computer
                 science background to develop higher quality
                 extractors. In this demonstration$^1$, we present
                 VINERy, an intuitive yet expressive visual IDE for
                 information extraction. We show how it supports the
                 full cycle of IE development without requiring a single
                 line of code and enables a wide range of users to
                 develop high quality IE extractors with minimal
                 efforts. The extractors visually built in VINERY are
                 automatically translated into semantically equivalent
                 extractors in a state-of-the-art declarative language
                 for IE. We also demonstrate how the auto-generated
                 extractors can then be imported into a conventional
                 Eclipse-based IDE for further enhancement. The results
                 of our user studies indicate that VINERY is a
                 significant step forward in facilitating extractor
                 development for both expert and novice IE developers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chu:2015:KRD,
  author =       "Xu Chu and John Morcos and Ihab F. Ilyas and Mourad
                 Ouzzani and Paolo Papotti and Nan Tang and Yin Ye",
  title =        "{KATARA}: reliable data cleaning with knowledge bases
                 and crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1952--1955",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824109",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data cleaning with guaranteed reliability is hard to
                 achieve without accessing external sources, since the
                 truth is not necessarily discoverable from the data at
                 hand. Furthermore, even in the presence of external
                 sources, mainly knowledge bases and humans, effectively
                 leveraging them still faces many challenges, such as
                 aligning heterogeneous data sources and decomposing a
                 complex task into simpler units that can be consumed by
                 humans. We present K atara, a novel end-to-end data
                 cleaning system powered by knowledge bases and
                 crowdsourcing. Given a table, a kb, and a crowd, Katara
                 (i) interprets the table semantics w.r.t. the given kb;
                 (ii) identifies correct and wrong data; and (iii)
                 generates top- k possible repairs for the wrong data.
                 Users will have the opportunity to experience the
                 following features of Katara: (1) Easy specification:
                 Users can define a Katara job with a browser-based
                 specification; (2) Pattern validation: Users can help
                 the system to resolve the ambiguity of different table
                 patterns (i.e., table semantics) discovered by Katara;
                 (3) Data annotation: Users can play the role of
                 internal crowd workers, helping Katara annotate data.
                 Moreover, Katara will visualize the annotated data as
                 correct data validated by the kb, correct data jointly
                 validated by the kb and the crowd, or erroneous tuples
                 along with their possible repairs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alvanaki:2015:GNB,
  author =       "Foteini Alvanaki and Romulo Goncalves and Milena
                 Ivanova and Martin Kersten and Kostis Kyzirakos",
  title =        "{GIS} navigation boosted by column stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1956--1959",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824110",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Earth observation sciences, astronomy, and seismology
                 have large data sets which have inherently rich spatial
                 and geospatial information. In combination with large
                 collections of semantically rich objects which have a
                 large number of thematic properties, they form a new
                 source of knowledge for urban planning, smart cities
                 and natural resource management. Modeling and storing
                 these properties indicating the relationships between
                 them is best handled in a relational database.
                 Furthermore, the scalability requirements posed by the
                 latest 26-attribute light detection and ranging (LIDAR)
                 data sets are a challenge for file-based solutions. In
                 this demo we show how to query a 640 billion point data
                 set using a column store enriched with GIS
                 functionality. Through a lightweight and cache
                 conscious secondary index called Imprints, spatial
                 queries performance on a flat table storage is
                 comparable to traditional file-based solutions. All the
                 results are visualised in real time using QGIS.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arocena:2015:GCY,
  author =       "Patricia C. Arocena and Radu Ciucanu and Boris Glavic
                 and Ren{\'e}e J. Miller",
  title =        "Gain control over your integration evaluations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1960--1963",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824111",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Integration systems are typically evaluated using a
                 few real-world scenarios (e.g., bibliographical or
                 biological datasets) or using synthetic scenarios
                 (e.g., based on star-schemas or other patterns for
                 schemas and constraints). Reusing such evaluations is a
                 cumbersome task because their focus is usually limited
                 to showcasing a specific feature of an approach. This
                 makes it difficult to compare integration solutions,
                 understand their generality, and understand their
                 performance for different application scenarios. Based
                 on this observation, we demonstrate some of the
                 requirements for developing integration benchmarks. We
                 argue that the major abstractions used for integration
                 problems have converged in the last decade which
                 enables the application of robust empirical methods to
                 integration problems (from schema evolution, to data
                 exchange, to answering queries using views and many
                 more). Specifically, we demonstrate that schema
                 mappings are the main abstraction that now drives most
                 integration solutions and show how a metadata generator
                 can be used to create more credible evaluations of the
                 performance and scalability of data integration
                 systems. We will use the demonstration to evangelize
                 for more robust, shared empirical evaluations of data
                 integration systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Diao:2015:AAU,
  author =       "Yanlei Diao and Kyriaki Dimitriadou and Zhan Li and
                 Wenzhao Liu and Olga Papaemmanouil and Kemi Peng and
                 Liping Peng",
  title =        "{AIDE}: an automatic user navigation system for
                 interactive data exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1964--1967",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824112",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data analysts often engage in data exploration tasks
                 to discover interesting data patterns, without knowing
                 exactly what they are looking for. Such exploration
                 tasks can be very labor-intensive because they often
                 require the user to review many results of ad-hoc
                 queries and adjust the predicates of subsequent queries
                 to balance the tradeoff between collecting all
                 interesting information and reducing the size of
                 returned data. In this demonstration we introduce AIDE,
                 a system that automates these exploration tasks. AIDE
                 steers the user towards interesting data areas based on
                 her relevance feedback on database samples, aiming to
                 achieve the goal of identifying all database objects
                 that match the user interest with high efficiency. In
                 our demonstration, conference attendees will see AIDE
                 in action for a variety of exploration tasks on
                 real-world datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aly:2015:DAA,
  author =       "Ahmed M. Aly and Ahmed S. Abdelhamid and Ahmed R.
                 Mahmood and Walid G. Aref and Mohamed S. Hassan and
                 Hazem Elmeleegy and Mourad Ouzzani",
  title =        "A demonstration of {AQWA}: adaptive
                 query-workload-aware partitioning of big spatial data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1968--1971",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824113",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ubiquity of location-aware devices, e.g.,
                 smartphones and GPS devices, has led to a plethora of
                 location-based services in which huge amounts of
                 geotagged information need to be efficiently processed
                 by large-scale computing clusters. This demo presents
                 AQWA, an adaptive and query-workload-aware data
                 partitioning mechanism for processing large-scale
                 spatial data. Unlike existing cluster-based systems,
                 e.g., SpatialHadoop, that apply static partitioning of
                 spatial data, AQWA has the ability to react to changes
                 in the query-workload and data distribution. A key
                 feature of AQWA is that it does not assume prior
                 knowledge of the query-workload or data distribution.
                 Instead, AQWA reacts to changes in both the data and
                 the query-workload by incrementally updating the
                 partitioning of the data. We demonstrate two prototypes
                 of AQWA deployed over Hadoop and Spark. In both
                 prototypes, we process spatial range and k
                 -nearest-neighbor (k NN, for short) queries over
                 large-scale spatial datasets, and we exploit the
                 performance of AQWA under different query-workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dittrich:2015:JID,
  author =       "Jens Dittrich and Patrick Bender",
  title =        "Janiform intra-document analytics for reproducible
                 research",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1972--1975",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824114",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Peer-reviewed publication of research papers is a
                 cornerstone of science. However, one of the many issues
                 of our publication culture is that our publications
                 only publish a summary of the final result of a long
                 project. This means that we put well-polished graphs
                 describing (some) of our experimental results into our
                 publications. However, the algorithms, input datasets,
                 benchmarks, raw result datasets, as well as scripts
                 that were used to produce the graphs in the first place
                 are rarely published and typically not available to
                 other researchers. Often they are only available when
                 personally asking the authors. In many cases, however,
                 they are not available at all. This means from a long
                 workflow that led to producing a graph for a research
                 paper, we only publish the final result rather than the
                 entire workflow. This is unfortunate and has been
                 criticized in various scientific communities. In this
                 demo we argue that one part of the problem is our dated
                 view on what a ``document'' and hence ``a publication''
                 is, should, and can be. As a remedy, we introduce
                 portable database files (PDbF). These files are
                 janiform, i.e. they are at the same time a standard
                 static pdf as well as a highly dynamic (offline)
                 HTML-document. PDbFs allow you to access the raw data
                 behind a graph, perform OLAP-style analysis, and
                 reproduce your own graphs from the raw data --- all of
                 this within a portable document. We demo a tool
                 allowing you to create PDbFs smoothly from within L$^A$
                 T$_E$ X. This tool allows you to preserve the workflow
                 of raw measurement data to its final graphical output
                 through all processing steps. Notice that this pdf
                 already showcases our technology: rename this file to
                 ``.html'' and see what happens (currently we support
                 the desktop versions of Firefox, Chrome, and Safari).
                 But please: do not try to rename this file to ``.ova''
                 and mount it in VirtualBox.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schubert:2015:FCU,
  author =       "Erich Schubert and Alexander Koos and Tobias Emrich
                 and Andreas Z{\"u}fle and Klaus Arthur Schmid and
                 Arthur Zimek",
  title =        "A framework for clustering uncertain data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1976--1979",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824115",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The challenges associated with handling uncertain
                 data, in particular with querying and mining, are
                 finding increasing attention in the research community.
                 Here we focus on clustering uncertain data and describe
                 a general framework for this purpose that also allows
                 to visualize and understand the impact of
                 uncertainty---using different uncertainty models---on
                 the data mining results. Our framework constitutes
                 release 0.7 of ELKI (http://elki.dbs.ifi.lmu.de/) and
                 thus comes along with a plethora of implementations of
                 algorithms, distance measures, indexing techniques,
                 evaluation measures and visualization components.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bidoit:2015:EWA,
  author =       "Nicole Bidoit and Melanie Herschel and Katerina
                 Tzompanaki",
  title =        "{EFQ}: why-not answer polynomials in action",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1980--1983",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824116",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "One important issue in modern database applications is
                 supporting the user with efficient tools to debug and
                 fix queries because such tasks are both time and skill
                 demanding. One particular problem is known as Why-Not
                 question and focusses on the reasons for missing tuples
                 from query results. The EFQ platform demonstrated here
                 has been designed in this context to efficiently
                 leverage Why-Not Answers polynomials, a novel approach
                 that provides the user with complete explanations to
                 Why-Not questions and allows for automatic, relevant
                 query refinements.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2015:EDD,
  author =       "Xiaolan Wang and Mary Feng and Yue Wang and Xin Luna
                 Dong and Alexandra Meliou",
  title =        "Error diagnosis and data profiling with {DataXRay}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1984--1987",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824117",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of identifying and repairing data errors
                 has been an area of persistent focus in data management
                 research. However, while traditional data cleaning
                 techniques can be effective at identifying several data
                 discrepancies, they disregard the fact that many errors
                 are systematic, inherent to the process that produces
                 the data, and thus will keep occurring unless the root
                 cause is identified and corrected. In this
                 demonstration, we will present a large-scale diagnostic
                 framework called D ataXRay. Like a medical X-ray that
                 aids the diagnosis of medical conditions by revealing
                 problems underneath the surface, DataXRay reveals
                 hidden connections and common properties among data
                 errors. Thus, in contrast to traditional cleaning
                 methods, which treat the symptoms, our system
                 investigates the underlying conditions that cause the
                 errors. The core of D ataXRay combines an intuitive and
                 principled cost model derived by Bayesian analysis, and
                 an efficient, highly-parallelizable diagnostic
                 algorithm that discovers common properties among
                 erroneous data elements in a top-down fashion. Our
                 system has a simple interface that allows users to load
                 different datasets, to interactively adjust key
                 diagnostic parameters, to explore the derived
                 diagnoses, and to compare with solutions produced by
                 alternative algorithms. Through this demonstration,
                 participants will understand (1) the characteristics of
                 good diagnoses, (2) how and why errors occur in
                 real-world datasets, and (3) the distinctions with
                 other related problems and approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pham:2015:SRD,
  author =       "Quan Pham and Severin Thaler and Tanu Malik and Ian
                 Foster and Boris Glavic",
  title =        "Sharing and reproducing database applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1988--1991",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824118",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Sharing and repeating scientific applications is
                 crucial for verifying claims, reproducing experimental
                 results (e.g., to repeat a computational experiment
                 described in a publication), and promoting reuse of
                 complex applications. The predominant methods of
                 sharing and making applications repeatable are building
                 a companion web site and/or provisioning a virtual
                 machine image (VMI). Recently, application
                 virtualization (AV), has emerged as a light-weight
                 alternative for sharing and efficient repeatability. AV
                 approaches such as Linux Containers create a
                 chroot-like environment [4], while approaches such as
                 CDE [1] trace system calls during application execution
                 to copy all binaries, data, and software dependencies
                 into a self-contained package.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wylot:2015:DTT,
  author =       "Marcin Wylot and Philippe Cudr{\'e}-Mauroux and Paul
                 Groth",
  title =        "A demonstration of {TripleProv}: tracking and querying
                 provenance over {Web} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1992--1995",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824119",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The proliferation of heterogeneous Linked Data on the
                 Web poses new challenges to database systems. In
                 particular, the capacity to store, track, and query
                 provenance data is becoming a pivotal feature of modern
                 triple stores. In this demonstration, we present
                 TripleProv: a new system extending a native RDF store
                 to efficiently handle the storage, tracking and
                 querying of provenance in RDF data. In the following,
                 we give an overview of our approach providing a
                 reliable and understandable specification of the way
                 results were derived from the data and how particular
                 pieces of data were combined to answer the query.
                 Subsequently, we present techniques enabling to tailor
                 queries with provenance data. Finally, we describe our
                 demonstration and how the attendees will be able to
                 interact with our system during the conference.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ortona:2015:WJW,
  author =       "Stefano Ortona and Giorgio Orsi and Marcello
                 Buoncristiano and Tim Furche",
  title =        "{WADaR}: joint wrapper and data repair",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "1996--1999",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824120",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Web scraping (or wrapping) is a popular means for
                 acquiring data from the web. Recent advancements have
                 made scalable wrapper-generation possible and enabled
                 data acquisition processes involving thousands of
                 sources. This makes wrapper analysis and maintenance
                 both needed and challenging as no scalable tools exists
                 that support these tasks. We demonstrate WADaR, a
                 scalable and highly automated tool for joint wrapper
                 and data repair. WADaR uses off-the-shelf entity
                 recognisers to locate target entities in
                 wrapper-generated data. Markov chains are used to
                 determine structural repairs, that are then encoded
                 into suitable repairs for both the data and
                 corresponding wrappers. We show that WADaR is able to
                 increase the quality of wrapper-generated relations
                 between 15\% and 60\%, and to fully repair the
                 corresponding wrapper without any knowledge of the
                 original website in more than 50\% of the cases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bendre:2015:DUD,
  author =       "Mangesh Bendre and Bofan Sun and Ding Zhang and Xinyan
                 Zhou and Kevin Chen-Chuan Chang and Aditya
                 Parameswaran",
  title =        "{DataSpread}: unifying databases and spreadsheets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2000--2003",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824121",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Spreadsheet software is often the tool of choice for
                 ad-hoc tabular data management, processing, and
                 visualization, especially on tiny data sets. On the
                 other hand, relational database systems offer
                 significant power, expressivity, and efficiency over
                 spreadsheet software for data management, while lacking
                 in the ease of use and ad-hoc analysis capabilities. We
                 demonstrate D ataSpread, a data exploration tool that
                 holistically unifies databases and spreadsheets. It
                 continues to offer a Microsoft Excel-based spreadsheet
                 front-end, while in parallel managing all the data in a
                 back-end database, specifically, PostgreSQL. DataSpread
                 retains all the advantages of spreadsheets, including
                 ease of use, ad-hoc analysis and visualization
                 capabilities, and a schema-free nature, while also
                 adding the advantages of traditional relational
                 databases, such as scalability and the ability to use
                 arbitrary SQL to import, filter, or join external or
                 internal tables and have the results appear in the
                 spreadsheet. DataSpread needs to reason about and
                 reconcile differences in the notions of schema,
                 addressing of cells and tuples, and the current
                 ``pane'' (which exists in spreadsheets but not in
                 traditional databases), and support data modifications
                 at both the front-end and the back-end. Our
                 demonstration will center on our first and early
                 prototype of the DataSpread, and will give the
                 attendees a sense for the enormous data exploration
                 capabilities offered by unifying spreadsheets and
                 databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Haas:2015:WNS,
  author =       "Daniel Haas and Sanjay Krishnan and Jiannan Wang and
                 Michael J. Franklin and Eugene Wu",
  title =        "Wisteria: nurturing scalable data cleaning
                 infrastructure",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2004--2007",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824122",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analysts report spending upwards of 80\% of their time
                 on problems in data cleaning. The data cleaning process
                 is inherently iterative, with evolving cleaning
                 workflows that start with basic exploratory data
                 analysis on small samples of dirty data, then refine
                 analysis with more sophisticated/expensive cleaning
                 operators (e.g., crowdsourcing), and finally apply the
                 insights to a full dataset. While an analyst often
                 knows at a logical level what operations need to be
                 done, they often have to manage a large search space of
                 physical operators and parameters. We present Wisteria,
                 a system designed to support the iterative development
                 and optimization of data cleaning workflows, especially
                 ones that utilize the crowd. Wisteria separates logical
                 operations from physical implementations, and driven by
                 analyst feedback, suggests optimizations and/or
                 replacements to the analyst's choice of physical
                 implementation. We highlight research challenges in
                 sampling, in-flight operator replacement, and
                 crowdsourcing. We overview the system architecture and
                 these techniques, then provide a demonstration designed
                 to showcase how Wisteria can improve iterative data
                 analysis and cleaning. The code is available at:
                 http://www.sampleclean.org.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{S:2015:CDA,
  author =       "Ashoke S. and Jayant R. Haritsa",
  title =        "{CODD}: a dataless approach to big data testing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2008--2011",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824123",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The construction and development of the so-called Big
                 Data systems has occupied centerstage in the data
                 management community in recent years. However, there
                 has been comparatively little attention paid to the
                 testing of such systems, an essential pre-requisite for
                 successful deployment. This is surprising given that
                 traditional testing techniques, which typically involve
                 construction of representative databases and regression
                 query suites, are completely impractical at Big Data
                 scale --- simply due to the time and space overheads
                 involved in their execution. For instance, consider the
                 situation where a database engineer wishes to evaluate
                 the query optimizer's behavior on a futuristic Big Data
                 setup featuring ``yottabyte'' ($ 10^{24} $ bytes) sized
                 relational tables. Obviously, just generating this
                 data, let alone storing it, is practically infeasible
                 even on the best of systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cebiric:2015:QOS,
  author =       "Sejla Cebiri{\'c} and Fran{\c{c}}ois Goasdou{\'e} and
                 Ioana Manolescu",
  title =        "Query-oriented summarization of {RDF} graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2012--2015",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824124",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Resource Description Framework (RDF) is a
                 graph-based data model promoted by the W3C as the
                 standard for Semantic Web applications. Its associated
                 query language is SPARQL. RDF graphs are often large
                 and varied, produced in a variety of contexts, e.g.,
                 scientific applications, social or online media,
                 government data etc. They are heterogeneous, i.e.,
                 resources described in an RDF graph may have very
                 different sets of properties. An RDF resource may have:
                 no types, one or several types (which may or may not be
                 related to each other). RDF Schema (RDFS) information
                 may optionally be attached to an RDF graph, to enhance
                 the description of its resources. Such statements also
                 entail that in an RDF graph, some data is implicit.
                 According to the W3C RDF and SPARQL specification, the
                 semantics of an RDF graph comprises both its explicit
                 and implicit data; in particular, SPARQL query answers
                 must be computed reflecting both the explicit and
                 implicit data. These features make RDF graphs complex,
                 both structurally and conceptually. It is intrinsically
                 hard to get familiar with a new RDF dataset, especially
                 if an RDF schema is sparse or not available at all.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chodpathumwan:2015:UDT,
  author =       "Yodsawalai Chodpathumwan and Amirhossein Aleyasen and
                 Arash Termehchy and Yizhou Sun",
  title =        "{Universal-DB}: towards representation independent
                 graph analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2016--2019",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824125",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph analytics algorithms leverage quantifiable
                 structural properties of the data to predict
                 interesting concepts and relationships. The same
                 information, however, can be represented using many
                 different structures and the structural properties
                 observed over particular representations do not
                 necessarily hold for alternative structures. Because
                 these algorithms tend to be highly effective over some
                 choices of structure, such as that of the databases
                 used to validate them, but not so effective with
                 others, graph analytics has largely remained the
                 province of experts who can find the desired forms for
                 these algorithms. We argue that in order to make graph
                 analytics usable, we should develop systems that are
                 effective over a wide range of choices of structural
                 organizations. We demonstrate Universal-DB an entity
                 similarity and proximity search system that returns the
                 same answers for a query over a wide range of choices
                 to represent the input database.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mahmood:2015:TDS,
  author =       "Ahmed R. Mahmood and Ahmed M. Aly and Thamir Qadah and
                 El Kindi Rezig and Anas Daghistani and Amgad Madkour
                 and Ahmed S. Abdelhamid and Mohamed S. Hassan and Walid
                 G. Aref and Saleh Basalamah",
  title =        "{Tornado}: a distributed spatio-textual stream
                 processing system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2020--2023",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824126",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The widespread use of location-aware devices together
                 with the increased popularity of micro-blogging
                 applications (e.g., Twitter) led to the creation of
                 large streams of spatio-textual data. In order to serve
                 real-time applications, the processing of these
                 large-scale spatio-textual streams needs to be
                 distributed. However, existing distributed stream
                 processing systems (e.g., Spark and Storm) are not
                 optimized for spatial/textual content. In this
                 demonstration, we introduce Tornado, a distributed
                 in-memory spatio-textual stream processing server that
                 extends Storm. To efficiently process spatio-textual
                 streams, Tornado introduces a spatio-textual indexing
                 layer to the architecture of Storm. The indexing layer
                 is adaptive, i.e., dynamically re-distributes the
                 processing across the system according to changes in
                 the data distribution and/or query workload. In
                 addition to keywords, higher-level textual concepts are
                 identified and are semantically matched against
                 spatio-textual queries. Tornado provides data
                 deduplication and fusion to eliminate redundant textual
                 data. We demonstrate a prototype of Tornado running
                 against real Twitter streams, where the users can
                 register continuous or snapshot spatio-textual queries
                 using a map-assisted query-interface.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Crotty:2015:VIA,
  author =       "Andrew Crotty and Alex Galakatos and Emanuel Zgraggen
                 and Carsten Binnig and Tim Kraska",
  title =        "{Vizdom}: interactive analytics through pen and
                 touch",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2024--2027",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824127",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Machine learning (ML) and advanced statistics are
                 important tools for drawing insights from large
                 datasets. However, these techniques often require human
                 intervention to steer computation towards meaningful
                 results. In this demo, we present V izdom, a new system
                 for interactive analytics through pen and touch.
                 Vizdom's frontend allows users to visually compose
                 complex workflows of ML and statistics operators on an
                 interactive whiteboard, and the back-end leverages
                 recent advances in workflow compilation techniques to
                 run these computations at interactive speeds.
                 Additionally, we are exploring approximation techniques
                 for quickly visualizing partial results that
                 incrementally refine over time. This demo will show
                 Vizdom's capabilities by allowing users to
                 interactively build complex analytics workflows using
                 real-world datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Consens:2015:SCE,
  author =       "Mariano P. Consens and Valeria Fionda and Shahan
                 Khatchadourian and Giuseppe Pirr{\`o}",
  title =        "{S+EPPs}: construct and explore bisimulation
                 summaries, plus optimize navigational queries; all on
                 existing {SPARQL} systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2028--2031",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824128",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate S+EPPs, a system that provides fast
                 construction of bisimulation summaries using graph
                 analytics platforms, and then enhances existing SPARQL
                 engines to support summary-based exploration and
                 navigational query optimization. The construction
                 component adds a novel optimization to a parallel
                 bisimulation algorithm implemented on a multi-core
                 graph processing framework. We show that for several
                 large, disk resident, real world graphs, full summary
                 construction can be completed in roughly the same time
                 as the data load. The query translation component
                 supports Extended Property Paths (EPPs), an enhancement
                 of SPARQL 1.1 property paths that can express a
                 significantly larger class of navigational queries.
                 EPPs are implemented via rewritings into a widely used
                 SPARQL subset. The optimization component can
                 (transparently to users) translate EPPs defined on
                 instance graphs into EPPs that take advantage of
                 bisimulation summaries. S+EPPs combines the query and
                 optimization translations to enable summary-based
                 optimization of graph traversal queries on top of
                 off-the-shelf SPARQL processors. The demonstration
                 showcases the construction of bisimulation summaries of
                 graphs (ranging from millions to billions of edges),
                 together with the exploration benefits and the
                 navigational query speedups obtained by leveraging
                 summaries stored alongside the original datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xirogiannopoulos:2015:GEI,
  author =       "Konstantinos Xirogiannopoulos and Udayan Khurana and
                 Amol Deshpande",
  title =        "{GraphGen}: exploring interesting graphs in relational
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2032--2035",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824129",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analyzing interconnection structures among the data
                 through the use of graph algorithms and graph analytics
                 has been shown to provide tremendous value in many
                 application domains. However, graphs are not the
                 primary choice for how most data is currently stored,
                 and users who want to employ graph analytics are forced
                 to extract data from their data stores, construct the
                 requisite graphs, and then use a specialized engine to
                 write and execute their graph analysis tasks. This
                 cumbersome and costly process not only raises barriers
                 in using graph analytics, but also makes it hard to
                 explore and identify hidden or implicit graphs in the
                 data. Here we demonstrate a system, called G raphGen,
                 that enables users to declaratively specify graph
                 extraction tasks over relational databases, visually
                 explore the extracted graphs, and write and execute
                 graph algorithms over them, either directly or using
                 existing graph libraries like the widely used NetworkX
                 Python library. We also demonstrate how unifying the
                 extraction tasks and the graph algorithms enables
                 significant optimizations that would not be possible
                 otherwise.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yoon:2015:DPF,
  author =       "Dong Young Yoon and Barzan Mozafari and Douglas P.
                 Brown",
  title =        "{DBSeer}: pain-free database administration through
                 workload intelligence",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2036--2039",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824130",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The pressing need for achieving and maintaining high
                 performance in database systems has made database
                 administration one of the most stressful jobs in
                 information technology. On the other hand, the
                 increasing complexity of database systems has made
                 qualified database administrators (DBAs) a scarce
                 resource. DBAs are now responsible for an array of
                 demanding tasks; they need to (i) provision and tune
                 their database according to their application
                 requirements, (ii) constantly monitor their database
                 for any performance failures or slowdowns, (iii)
                 diagnose the root cause of the performance problem in
                 an accurate and timely fashion, and (iv) take prompt
                 actions that can restore acceptable database
                 performance. However, much of the research in the past
                 years has focused on improving the raw performance of
                 the database systems, rather than improving their
                 manageability. Besides sophisticated consoles for
                 monitoring performance and a few auto-tuning wizards,
                 DBAs are not provided with any help other than their
                 own many years of experience. Typically, their only
                 resort is trial-and-error, which is a tedious, ad-hoc
                 and often sub-optimal solution. In this demonstration,
                 we present DBSeer, a workload intelligence framework
                 that exploits advanced machine learning and causality
                 techniques to aid DBAs in their various
                 responsibilities. DBSeer analyzes large volumes of
                 statistics and telemetry data collected from various
                 log files to provide the DBA with a suite of rich
                 functionalities including performance prediction,
                 performance diagnosis, bottleneck explanation, workload
                 insight, optimal admission control, and what-if
                 analysis. In this demo, we showcase various features of
                 DBSeer by predicting and analyzing the performance of a
                 live database system. Will also reproduce a number of
                 realistic performance problems in the system, and allow
                 the audience to use DBSeer to quickly diagnose and
                 resolve their root cause.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kejariwal:2015:RTA,
  author =       "Arun Kejariwal and Sanjeev Kulkarni and Karthik
                 Ramasamy",
  title =        "Real time analytics: algorithms and systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2040--2041",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824132",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "V elocity is one of the 4 Vs commonly used to
                 characterize Big Data [5]. In this regard, Forrester
                 remarked the following in Q3 2014 [8]: ``The high
                 velocity, white-water flow of data from innumerable
                 real-time data sources such as market data, Internet of
                 Things, mobile, sensors, click-stream, and even
                 transactions remain largely unnavigated by most firms.
                 The opportunity to leverage streaming analytics has
                 never been greater.'' Example use cases of streaming
                 analytics include, but not limited to: (a)
                 visualization of business metrics in real-time (b)
                 facilitating highly personalized experiences (c)
                 expediting response during emergencies. Streaming
                 analytics is extensively used in a wide variety of
                 domains such as healthcare, e-commerce, financial
                 services, telecommunications, energy and utilities,
                 manufacturing, government and transportation. In this
                 tutorial, we shall present an in-depth overview of
                 streaming analytics --- applications, algorithms and
                 platforms --- landscape. We shall walk through how the
                 field has evolved over the last decade and then discuss
                 the current challenges --- the impact of the other
                 three V s, viz., V olume, V ariety and V eracity, on
                 Big Data streaming analytics. The tutorial is intended
                 for both researchers and practitioners in the industry.
                 We shall also present state-of-the-affairs of streaming
                 analytics at Twitter.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khan:2015:UGM,
  author =       "Arijit Khan and Lei Chen",
  title =        "On uncertain graphs modeling and queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2042--2043",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824133",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale, highly-interconnected networks pervade
                 both our society and the natural world around us.
                 Uncertainty, on the other hand, is inherent in the
                 underlying data due to a variety of reasons, such as
                 noisy measurements, lack of precise information needs,
                 inference and prediction models, or explicit
                 manipulation, e.g., for privacy purposes. Therefore,
                 uncertain, or probabilistic, graphs are increasingly
                 used to represent noisy linked data in many emerging
                 application scenarios, and they have recently become a
                 hot topic in the database research community. While
                 many classical graph algorithms such as reachability
                 and shortest path queries become \# P -complete, and
                 hence, more expensive in uncertain graphs; various
                 complex queries are also emerging over uncertain
                 networks, such as pattern matching, information
                 diffusion, and influence maximization queries. In this
                 tutorial, we discuss the sources of uncertain graphs
                 and their applications, uncertainty modeling, as well
                 as the complexities and algorithmic advances on
                 uncertain graphs processing in the context of both
                 classical and emerging graph queries. We emphasize the
                 current challenges and highlight some future research
                 directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2015:TMI,
  author =       "Xin Luna Dong and Wang-Chiew Tan",
  title =        "A time machine for information: looking back to look
                 forward",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2044--2045",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824134",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the abundant availability of information one can
                 mine from the Web today, there is increasing interest
                 to develop a complete understanding of the history of
                 an entity (i.e., a person, a company, a music genre, a
                 country, etc.) (see, for example, [7, 9, 10, 11]) and
                 to depict trends over time [5, 12, 13]. This, however,
                 remains a largely difficult and manual task despite
                 more than a couple of decades of research in the areas
                 of temporal databases and data integration.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Das:2015:SAS,
  author =       "Mahashweta Das and Gautam Das",
  title =        "Structured analytics in social media",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2046--2047",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824135",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The rise of social media has turned the Web into an
                 online community where people connect, communicate, and
                 collaborate with each other. Structured analytics in
                 social media is the process of discovering the
                 structure of the relationships emerging from this
                 social media use. It focuses on identifying the users
                 involved, the activities they undertake, the actions
                 they perform, and the items (e.g., movies, restaurants,
                 blogs, etc.) they create and interact with. There are
                 two key challenges facing these tasks: how to organize
                 and model social media content, which is often
                 unstructured in its raw form, in order to employ
                 structured analytics on it; and how to employ analytics
                 algorithms to capture both explicit link-based
                 relationships and implicit behavior-based
                 relationships. In this tutorial, we systemize and
                 summarize the research so far in analyzing social
                 interactions between users and items in the Web from
                 data mining and database perspectives. We start with a
                 general overview of the topic, including discourse to
                 various exciting and practical applications. Then, we
                 discuss the state-of-art for modeling the data,
                 formalizing the mining task, developing the algorithmic
                 solutions, and evaluating on real datasets. We also
                 emphasize open problems and challenges for future
                 research in the area of structured analytics and social
                 media.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gao:2015:TDC,
  author =       "Jing Gao and Qi Li and Bo Zhao and Wei Fan and Jiawei
                 Han",
  title =        "Truth discovery and crowdsourcing aggregation: a
                 unified perspective",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2048--2049",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824136",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the era of Big Data, data entries, even describing
                 the same objects or events, can come from a variety of
                 sources, where a data source can be a web page, a
                 database or a person. Consequently, conflicts among
                 sources become inevitable. To resolve the conflicts and
                 achieve high quality data, truth discovery and
                 crowdsourcing aggregation have been studied
                 intensively. However, although these two topics have a
                 lot in common, they are studied separately and are
                 applied to different domains. To answer the need of a
                 systematic introduction and comparison of the two
                 topics, we present an organized picture on truth
                 discovery and crowdsourcing aggregation in this
                 tutorial. They are compared on both theory and
                 application levels, and their related areas as well as
                 open questions are discussed.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abadi:2015:SHS,
  author =       "Daniel Abadi and Shivnath Babu and Fatma {\"O}zcan and
                 Ippokratis Pandis",
  title =        "{SQL-on-Hadoop} systems: tutorial",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2050--2051",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824137",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Enterprises are increasingly using Apache Hadoop, more
                 specifically HDFS, as a central repository for all
                 their data; data coming from various sources, including
                 operational systems, social media and the web, sensors
                 and smart devices, as well as their applications. At
                 the same time many enterprise data management tools
                 (e.g. from SAP ERP and SAS to Tableau) rely on SQL and
                 many enterprise users are familiar and comfortable with
                 SQL. As a result, SQL processing over Hadoop data has
                 gained significant traction over the recent years, and
                 the number of systems that provide such capability has
                 increased significantly. In this tutorial we use the
                 term SQL-on-Hadoop to refer to systems that provide
                 some level of declarative SQL(-like) processing over
                 HDFS and noSQL data sources, using architectures that
                 include computational or storage engines compatible
                 with Apache Hadoop.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Loaiza:2015:EDH,
  author =       "Juan Loaiza",
  title =        "Engineering database hardware and software together",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2052--2052",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824139",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Since its inception, Oracle's database software
                 primarily ran on customer configured off-the-shelf
                 hardware. A decade ago, the architecture of
                 conventional systems started to become a bottleneck and
                 Oracle developed the Oracle Exadata Database Machine to
                 optimize the full hardware and software stack for
                 database workloads. Exadata is based on a scale-out
                 architecture of database servers and storage servers
                 that optimizes both OLTP and Analytic workloads while
                 hosting hundreds of databases simultaneously on the
                 same system. By using database specific protocols for
                 storage and networking we bypass limitations imposed by
                 conventional network and storage layers. Exadata is now
                 deployed at thousands of Enterprises including 4 of the
                 5 largest banks, telecoms, and retailers for varied
                 workloads such as interbank funds transfers,
                 e-commerce, ERP, Cloud SaaS applications, and petabyte
                 data warehouses. Five years ago, Oracle initiated a
                 project to extend our database stack beyond software
                 and systems and into the architecture of the
                 microprocessor itself. The goal of this effort is to
                 dramatically improve the performance, reliability and
                 cost effectiveness of a new generation of database
                 machines. The new SPARC M7 processor is the first step.
                 The M7 is an extraordinarily fast conventional
                 processor with 32-cores per socket and an extremely
                 high bandwidth memory system. Added to its conventional
                 processing capabilities are 32 custom on-chip database
                 co-processors that run database searches at full memory
                 bandwidth rates, and decompress data in real-time to
                 increase memory bandwidth and capacity. Further, the M7
                 implements innovative fine-grained memory protection to
                 secure sensitive business data. In the presentation we
                 will describe how Oracle's engineering teams integrate
                 software and hardware at all levels to achieve
                 breakthrough performance, reliability, and security for
                 the database and rest of the modern data processing
                 stack.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Balazinska:2015:BDR,
  author =       "Magdalena Balazinska",
  title =        "Big data research: will industry solve all the
                 problems?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2053--2056",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824140",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The need for effective tools for big data data
                 management and analytics continues to grow. While the
                 ecosystem of tools is expanding many research problems
                 remain open: they include challenges around efficient
                 processing, flexible analytics, ease of use, and
                 operation as a service. Many new systems and much
                 innovation, however, come from industry (or from
                 academic projects that quickly became big players in
                 industry). An important question for our community is
                 whether industry will solve all the problems or whether
                 there is a place for academic research in big data and
                 what is that place. In this paper, we address this
                 question by looking back at our research on the Nuage,
                 CQMS, Myria, and Data Pricing projects, and the SciDB
                 collaboration.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Walter:2015:BPB,
  author =       "Todd Walter",
  title =        "Big plateaus of {Big Data} on the big island",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2057--2057",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824141",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In ancient texts, 40 was a magic number. It meant a
                 ``lot'' or ``a long time.'' 40 years represented the
                 time it took for a new generation to arise. A look back
                 at 40 years of VLDB suggests that this applies to
                 database researchers as well --- the young researchers
                 of the early VLDBs are now the old folks of the
                 database world, and a new generation is creating
                 afresh. Over this period many plateaus of ``Big Data''
                 have challenged the database community and been
                 conquered. But there is still no free lunch ---
                 database research is really the science of trade-offs,
                 many of which are no different today than 40 years ago.
                 And of course the evolution of hardware technology
                 continues to swing the trade-off pendulum while
                 enabling new plateaus to be reached. Todd will take a
                 look back at customer big data plateaus of the past. He
                 will look at where we are today, then use his crystal
                 ball and the lessons of the past to extrapolate the
                 next several plateaus --- how they will be the same and
                 how will they be different. Along the way we will have
                 a little fun with some VLDB and Teradata history.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ailamaki:2015:DHB,
  author =       "Anastasia Ailamaki",
  title =        "Databases and hardware: the beginning and sequel of a
                 beautiful friendship",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "12",
  pages =        "2058--2061",
  month =        aug,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2824032.2824142",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 16 18:23:11 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Fast query and transaction processing is the goal of
                 40 years of database research and the reason of
                 existence for many new database system architectures.
                 In data management, system performance means acceptable
                 response time and throughput on critical-path
                 operations, ideally with scalability guarantees.
                 Performance is improved with top-of-the line research
                 on data processing algorithms; efficiency, however, is
                 contingent on seamless collaboration between the
                 database software and hardware and storage devices. In
                 1980, the goal was to minimize disk accesses; in 2000,
                 memory replaced disks in terms of access costs.
                 Nowadays performance is synonymous to scalability;
                 scalability, in turn, translates into sustainable and
                 predictable use of hardware resources in the face of
                 embarrassing parallelism and deep storage hierarchies
                 while minimizing energy needs --- a challenging goal in
                 multiple dimensions. We discuss work done in the past
                 four decades to tighten the interaction between the
                 database software and underlying hardware and show
                 that, as application and microarchitecture roadmaps
                 evolve, the effort of maintaining smooth collaboration
                 blossoms into a multitude of interesting research
                 avenues with direct technological impact.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aly:2015:AAQ,
  author =       "Ahmed M. Aly and Ahmed R. Mahmood and Mohamed S.
                 Hassan and Walid G. Aref and Mourad Ouzzani and Hazem
                 Elmeleegy and Thamir Qadah",
  title =        "{AQWA}: adaptive query workload aware partitioning of
                 big spatial data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2062--2073",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831361",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The unprecedented spread of location-aware devices has
                 resulted in a plethora of location-based services in
                 which huge amounts of spatial data need to be
                 efficiently processed by large-scale computing
                 clusters. Existing cluster-based systems for processing
                 spatial data employ static data-partitioning structures
                 that cannot adapt to data changes, and that are
                 insensitive to the query workload. Hence, these systems
                 are incapable of consistently providing good
                 performance. To close this gap, we present AQWA, an
                 adaptive and query-workload-aware mechanism for
                 partitioning large-scale spatial data. AQWA does not
                 assume prior knowledge of the data distribution or the
                 query workload. Instead, as data is consumed and
                 queries are processed, the data partitions are
                 incrementally updated. With extensive experiments using
                 real spatial data from Twitter, and various workloads
                 of range and k -nearest-neighbor queries, we
                 demonstrate that AQWA can achieve an order of magnitude
                 enhancement in query performance compared to the
                 state-of-the-art systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khayyat:2015:LFS,
  author =       "Zuhair Khayyat and William Lucia and Meghna Singh and
                 Mourad Ouzzani and Paolo Papotti and Jorge-Arnulfo
                 Quian{\'e}-Ruiz and Nan Tang and Panos Kalnis",
  title =        "Lightning fast and space efficient inequality joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2074--2085",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831362",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See erratum \cite{Khayyat:2017:ELF}.",
  abstract =     "Inequality joins, which join relational tables on
                 inequality conditions, are used in various
                 applications. While there have been a wide range of
                 optimization methods for joins in database systems,
                 from algorithms such as sort-merge join and band join,
                 to various indices such as B$^+$ -tree, R$^*$ -tree and
                 Bitmap, inequality joins have received little attention
                 and queries containing such joins are usually very
                 slow. In this paper, we introduce fast inequality join
                 algorithms. We put columns to be joined in sorted
                 arrays and we use permutation arrays to encode
                 positions of tuples in one sorted array w.r.t. the
                 other sorted array. In contrast to sort-merge join, we
                 use space efficient bit-arrays that enable
                 optimizations, such as Bloom filter indices, for fast
                 computation of the join results. We have implemented a
                 centralized version of these algorithms on top of
                 PostgreSQL, and a distributed version on top of Spark
                 SQL. We have compared against well known optimization
                 techniques for inequality joins and show that our
                 solution is more scalable and several orders of
                 magnitude faster.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2015:FPO,
  author =       "Jinfei Liu and Li Xiong and Jian Pei and Jun Luo and
                 Haoyu Zhang",
  title =        "Finding {Pareto} optimal groups: group-based skyline",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2086--2097",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831363",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Skyline computation, aiming at identifying a set of
                 skyline points that are not dominated by any other
                 point, is particularly useful for multi-criteria data
                 analysis and decision making. Traditional skyline
                 computation, however, is inadequate to answer queries
                 that need to analyze not only individual points but
                 also groups of points. To address this gap, we
                 generalize the original skyline definition to the novel
                 group-based skyline (G-Skyline), which represents
                 Pareto optimal groups that are not dominated by other
                 groups. In order to compute G-Skyline groups consisting
                 of k points efficiently, we present a novel structure
                 that represents the points in a directed skyline graph
                 and captures the dominance relationships among the
                 points based on the first k skyline layers. We propose
                 efficient algorithms to compute the first k skyline
                 layers. We then present two heuristic algorithms to
                 efficiently compute the G-Skyline groups: the
                 point-wise algorithm and the unit group-wise algorithm,
                 using various pruning strategies. The experimental
                 results on the real NBA dataset and the synthetic
                 datasets show that G-Skyline is interesting and useful,
                 and our algorithms are efficient and scalable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Faulkner:2015:RQN,
  author =       "Taylor Kessler Faulkner and Will Brackenbury and
                 Ashwin Lall",
  title =        "$k$-regret queries with nonlinear utilities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2098--2109",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831364",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In exploring representative databases, a primary issue
                 has been finding accurate models of user preferences.
                 Given this, our work generalizes the method of regret
                 minimization as proposed by Nanongkai et al. to include
                 nonlinear utility functions. Regret minimization is an
                 approach for selecting k representative points from a
                 database such that every user's ideal point in the
                 entire database is similar to one of the k points. This
                 approach combines benefits of the methods top- k and
                 skyline; it controls the size of the output but does
                 not require knowledge of users' preferences. Prior work
                 with k -regret queries assumes users' preferences to be
                 modeled by linear utility functions. In this paper, we
                 derive upper and lower bounds for nonlinear utility
                 functions, as these functions can better fit
                 occurrences such as diminishing marginal returns,
                 propensity for risk, and substitutability of
                 preferences. To model these phenomena, we analyze a
                 broad subset of convex, concave, and constant
                 elasticity of substitution functions. We also run
                 simulations on real and synthetic data to prove the
                 efficacy of our bounds in practice.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shi:2015:CTM,
  author =       "Juwei Shi and Yunjie Qiu and Umar Farooq Minhas and
                 Limei Jiao and Chen Wang and Berthold Reinwald and
                 Fatma {\"O}zcan",
  title =        "Clash of the titans: {MapReduce} vs. {Spark} for large
                 scale data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2110--2121",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831365",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MapReduce and Spark are two very popular open source
                 cluster computing frameworks for large scale data
                 analytics. These frameworks hide the complexity of task
                 parallelism and fault-tolerance, by exposing a simple
                 programming API to users. In this paper, we evaluate
                 the major architectural components in MapReduce and
                 Spark frameworks including: shuffle, execution model,
                 and caching, by using a set of important analytic
                 workloads. To conduct a detailed analysis, we developed
                 two profiling tools: (1) We correlate the task
                 execution plan with the resource utilization for both
                 MapReduce and Spark, and visually present this
                 correlation; (2) We provide a break-down of the task
                 execution time for in-depth analysis. Through detailed
                 experiments, we quantify the performance differences
                 between MapReduce and Spark. Furthermore, we attribute
                 these performance differences to different components
                 which are architected differently in the two
                 frameworks. We further expose the source of these
                 performance differences by using a set of
                 micro-benchmark experiments. Overall, our experiments
                 show that Spark is about 2.5x, 5x, and 5x faster than
                 MapReduce, for Word Count, k-means, and PageRank,
                 respectively. The main causes of these speedups are the
                 efficiency of the hash-based aggregation component for
                 combine, as well as reduced CPU and disk overheads due
                 to RDD caching in Spark. An exception to this is the
                 Sort workload, for which MapReduce is 2x faster than
                 Spark. We show that MapReduce's execution model is more
                 efficient for shuffling data than Spark, thus making
                 Sort run faster on MapReduce.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2015:TMI,
  author =       "Yu Liu and Jiaheng Lu and Hua Yang and Xiaokui Xiao
                 and Zhewei Wei",
  title =        "Towards maximum independent sets on massive graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2122--2133",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831366",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Maximum independent set (MIS) is a fundamental problem
                 in graph theory and it has important applications in
                 many areas such as social network analysis, graphical
                 information systems and coding theory. The problem is
                 NP-hard, and there has been numerous studies on its
                 approximate solutions. While successful to a certain
                 degree, the existing methods require memory space at
                 least linear in the size of the input graph. This has
                 become a serious concern in view of the massive volume
                 of today's fast-growing graphs. In this paper, we study
                 the MIS problem under the semi-external setting, which
                 assumes that the main memory can accommodate all
                 vertices of the graph but not all edges. We present a
                 greedy algorithm and a general vertex-swap framework,
                 which swaps vertices to incrementally increase the size
                 of independent sets. Our solutions require only few
                 sequential scans of graphs on the disk file, thus
                 enabling in-memory computation without costly random
                 disk accesses. Experiments on large-scale datasets show
                 that our solutions are able to compute a large
                 independent set for a massive graph with 59 million
                 vertices and 151 million edges using a commodity
                 machine, with a memory cost of 469MB and a time cost of
                 three minutes, while yielding an approximation ratio
                 that is around 99\% of the theoretical optimum.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Meehan:2015:SSM,
  author =       "John Meehan and Nesime Tatbul and Stan Zdonik and
                 Cansu Aslantas and Ugur Cetintemel and Jiang Du and Tim
                 Kraska and Samuel Madden and David Maier and Andrew
                 Pavlo and Michael Stonebraker and Kristin Tufte and Hao
                 Wang",
  title =        "{S-Store}: streaming meets transaction processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2134--2145",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831367",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Stream processing addresses the needs of real-time
                 applications. Transaction processing addresses the
                 coordination and safety of short atomic computations.
                 Heretofore, these two modes of operation existed in
                 separate, stove-piped systems. In this work, we attempt
                 to fuse the two computational paradigms in a single
                 system called S-Store. In this way, S-Store can
                 simultaneously accommodate OLTP and streaming
                 applications. We present a simple transaction model for
                 streams that integrates seamlessly with a traditional
                 OLTP system, and provides both ACID and stream-oriented
                 guarantees. We chose to build S-Store as an extension
                 of H-Store --- an open-source, in-memory, distributed
                 OLTP database system. By implementing S-Store in this
                 way, we can make use of the transaction processing
                 facilities that H-Store already provides, and we can
                 concentrate on the additional features that are needed
                 to support streaming. Similar implementations could be
                 done using other main-memory OLTP platforms. We show
                 that we can actually achieve higher throughput for
                 streaming workloads in S-Store than an equivalent
                 deployment in H-Store alone. We also show how this can
                 be achieved within H-Store with the addition of a
                 modest amount of new functionality. Furthermore, we
                 compare S-Store to two state-of-the-art streaming
                 systems, Esper and Apache Storm, and show how S-Store
                 can sometimes exceed their performance while at the
                 same time providing stronger correctness guarantees.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Levandoski:2015:MVR,
  author =       "Justin Levandoski and David Lomet and Sudipta Sengupta
                 and Ryan Stutsman and Rui Wang",
  title =        "Multi-version range concurrency control in
                 {Deuteronomy}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2146--2157",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831368",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Deuteronomy transactional key value store executes
                 millions of serializable transactions/second by
                 exploiting multi-version timestamp order concurrency
                 control. However, it has not supported range
                 operations, only individual record operations (e.g.,
                 create, read, update, delete). In this paper, we
                 enhance our multi-version timestamp order technique to
                 handle range concurrency and prevent phantoms.
                 Importantly, we maintain high performance while
                 respecting the clean separation of duties required by
                 Deuteronomy, where a transaction component performs
                 purely logical concurrency control (including range
                 support), while a data component performs data storage
                 and management duties. Like the rest of the Deuteronomy
                 stack, our range technique manages concurrency
                 information in a latch-free manner. With our range
                 enhancement, Deuteronomy can reach scan speeds of
                 nearly 250 million records/s (more than 27 GB/s) on
                 modern hardware, while providing serializable isolation
                 complete with phantom prevention.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2015:QEI,
  author =       "Hao Li and Chee-Yong Chan and David Maier",
  title =        "Query from examples: an iterative, data-driven
                 approach to query construction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2158--2169",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831369",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we propose a new approach, called Query
                 from Examples (QFE), to help non-expert database users
                 construct SQL queries. Our approach, which is designed
                 for users who might be unfamiliar with SQL, only
                 requires that the user is able to determine whether a
                 given output table is the result of his or her intended
                 query on a given input database. To kick-start the
                 construction of a target query Q, the user first
                 provides a pair of inputs: a sample database D and an
                 output table R which is the result of Q on D. As there
                 will be many candidate queries that transform D to R,
                 QFE winnows this collection by presenting the user with
                 new database-result pairs that distinguish these
                 candidates. Unlike previous approaches that use
                 synthetic data for such pairs, QFE strives to make
                 these distinguishing pairs as close to the original (
                 D,R) pair as possible. By doing so, it seeks to
                 minimize the effort needed by a user to determine if a
                 new database-result pair is consistent with his or her
                 desired query. We demonstrate the effectiveness and
                 efficiency of our approach using real datasets from
                 SQLShare, a cloud-based platform designed to help
                 scientists utilize RDBMS technology for data
                 analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Galhotra:2015:TCR,
  author =       "Sainyam Galhotra and Amitabha Bagchi and Srikanta
                 Bedathur and Maya Ramanath and Vidit Jain",
  title =        "Tracking the conductance of rapidly evolving
                 topic-subgraphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2170--2181",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831370",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Monitoring the formation and evolution of communities
                 in large online social networks such as Twitter is an
                 important problem that has generated considerable
                 interest in both industry and academia. Fundamentally,
                 the problem can be cast as studying evolving sugraphs
                 (each subgraph corresponding to a topical community) on
                 an underlying social graph --- with users as nodes and
                 the connection between them as edges. A key metric of
                 interest in this setting is tracking the changes to the
                 conductance of subgraphs induced by edge activations.
                 This metric quantifies how well or poorly connected a
                 subgraph is to the rest of the graph relative to its
                 internal connections. Conductance has been demonstrated
                 to be of great use in many applications, such as
                 identifying bursty topics, tracking the spread of
                 rumors, and so on. However, tracking this simple metric
                 presents a considerable scalability challenge --- the
                 underlying social network is large, the number of
                 communities that are active at any moment is large, the
                 rate at which these communities evolve is high, and
                 moreover, we need to track conductance in real-time. We
                 address these challenges in this paper. We propose an
                 in-memory approximation called BloomGraphs to store and
                 update these (possibly overlapping) evolving subgraphs.
                 As the name suggests, we use Bloom filters to represent
                 an approximation of the underlying graph. This
                 representation is compact and computationally efficient
                 to maintain in the presence of updates. This is
                 especially important when we need to simultaneously
                 maintain thousands of evolving subgraphs. BloomGraphs
                 are used in computing and tracking conductance of these
                 subgraphs as edge-activations arrive. BloomGraphs have
                 several desirable properties in the context of this
                 application, including a small memory footprint and
                 efficient updateability. We also demonstrate
                 mathematically that the error incurred in computing
                 conductance is one-sided and that in the case of
                 evolving subgraphs the change in approximate
                 conductance has the same sign as the change in exact
                 conductance in most cases. We validate the
                 effectiveness of BloomGraphs through extensive
                 experimentation on large Twitter graphs and other
                 social networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vartak:2015:SED,
  author =       "Manasi Vartak and Sajjadur Rahman and Samuel Madden
                 and Aditya Parameswaran and Neoklis Polyzotis",
  title =        "{SeeDB}: efficient data-driven visualization
                 recommendations to support visual analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2182--2193",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831371",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data analysts often build visualizations as the first
                 step in their analytical workflow. However, when
                 working with high-dimensional datasets, identifying
                 visualizations that show relevant or desired trends in
                 data can be laborious. We propose S eeDB, a
                 visualization recommendation engine to facilitate fast
                 visual analysis: given a subset of data to be studied,
                 SeeDB intelligently explores the space of
                 visualizations, evaluates promising visualizations for
                 trends, and recommends those it deems most ``useful''
                 or ``interesting''. The two major obstacles in
                 recommending interesting visualizations are (a) scale:
                 evaluating a large number of candidate visualizations
                 while responding within interactive time scales, and
                 (b) utility: identifying an appropriate metric for
                 assessing interestingness of visualizations. For the
                 former, SeeDB introduces pruning optimizations to
                 quickly identify high-utility visualizations and
                 sharing optimizations to maximize sharing of
                 computation across visualizations. For the latter, as a
                 first step, we adopt a deviation-based metric for
                 visualization utility, while indicating how we may be
                 able to generalize it to other factors influencing
                 utility. We implement SeeDB as a middleware layer that
                 can run on top of any DBMS. Our experiments show that
                 our framework can identify interesting visualizations
                 with high accuracy. Our optimizations lead to multiple
                 orders of magnitude speedup on relational row and
                 column stores and provide recommendations at
                 interactive time scales. Finally, we demonstrate via a
                 user study the effectiveness of our deviation-based
                 utility metric and the value of recommendations in
                 supporting visual analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qiu:2015:DLS,
  author =       "Disheng Qiu and Luciano Barbosa and Xin Luna Dong and
                 Yanyan Shen and Divesh Srivastava",
  title =        "{Dexter}: large-scale discovery and extraction of
                 product specifications on the web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "8",
  number =       "13",
  pages =        "2194--2205",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2831360.2831372",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Sep 30 17:17:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The web is a rich resource of structured data. There
                 has been an increasing interest in using web structured
                 data for many applications such as data integration,
                 web search and question answering. In this paper, we
                 present Dexter, a system to find product sites on the
                 web, and detect and extract product specifications from
                 them. Since product specifications exist in multiple
                 product sites, our focused crawler relies on search
                 queries and backlinks to discover product sites. To
                 perform the detection, and handle the high diversity of
                 specifications in terms of content, size and format,
                 our system uses supervised learning to classify HTML
                 fragments (e.g., tables and lists) present in web pages
                 as specifications or not. To perform large-scale
                 extraction of the attribute-value pairs from the HTML
                 fragments identified by the specification detector,
                 Dexter adopts two lightweight strategies: a
                 domain-independent and unsupervised wrapper method,
                 which relies on the observation that these HTML
                 fragments have very similar structure; and a
                 combination of this strategy with a previous approach,
                 which infers extraction patterns by annotations
                 generated by automatic but noisy annotators. The
                 results show that our crawler strategy to locate
                 product specification pages is effective: (1) it
                 discovered 1:46AM product specification pages from
                 3,005 sites and 9 different categories; (2) the
                 specification detector obtains high values of F-measure
                 (close to 0:9) over a heterogeneous set of product
                 specifications; and (3) our efficient wrapper methods
                 for attribute-value extraction get very high values of
                 precision (0.92) and recall (0.95) and obtain better
                 results than a state-of-the-art, supervised rule-based
                 wrapper.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2015:QAL,
  author =       "Qiang Huang and Jianlin Feng and Yikai Zhang and Qiong
                 Fang and Wilfred Ng",
  title =        "Query-aware locality-sensitive hashing for approximate
                 nearest neighbor search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "1",
  pages =        "1--12",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Locality-Sensitive Hashing (LSH) and its variants are
                 the well-known indexing schemes for the $c$-Approximate
                 Nearest Neighbor (c -ANN) search problem in
                 high-dimensional Euclidean space. Traditionally, LSH
                 functions are constructed in a query-oblivious manner
                 in the sense that buckets are partitioned before any
                 query arrives. However, objects closer to a query may
                 be partitioned into different buckets, which is
                 undesirable. Due to the use of query-oblivious bucket
                 partition, the state-of-the-art LSH schemes for
                 external memory, namely C2LSH and LSB-Forest, only work
                 with approximation ratio of integer $ c \geq 2$. In
                 this paper, we introduce a novel concept of query-aware
                 bucket partition which uses a given query as the
                 ``anchor'' for bucket partition. Accordingly, a
                 query-aware LSH function is a random projection coupled
                 with query-aware bucket partition, which removes random
                 shift required by traditional query-oblivious LSH
                 functions. Notably, query-aware bucket partition can be
                 easily implemented so that query performance is
                 guaranteed. We propose a novel query-aware LSH scheme
                 named QALSH for $c$-ANN search over external memory.
                 Our theoretical studies show that QALSH enjoys a
                 guarantee on query quality. The use of query-aware LSH
                 function enables QALSH to work with any approximation
                 ratio $ c > 1$. Extensive experiments show that QALSH
                 outperforms C2LSH and LSB-Forest, especially in
                 high-dimensional space. Specifically, by using a ratio
                 $ c < 2$, QALSH can achieve much better query
                 quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khaouid:2015:KCD,
  author =       "Wissam Khaouid and Marina Barsky and Venkatesh
                 Srinivasan and Alex Thomo",
  title =        "{$K$}-core decomposition of large networks on a single
                 {PC}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "1",
  pages =        "13--23",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Studying the topology of a network is critical to
                 inferring underlying dynamics such as tolerance to
                 failure, group behavior and spreading patterns.
                 $k$-core decomposition is a well-established metric
                 which partitions a graph into layers from external to
                 more central vertices. In this paper we aim to explore
                 whether $k$-core decomposition of large networks can be
                 computed using a consumer-grade PC. We feature
                 implementations of the ``vertex-centric'' distributed
                 protocol introduced by Montresor, De Pellegrini and
                 Miorandi on GraphChi and Webgraph. Also, we present an
                 accurate implementation of the Batagelj and Zaversnik
                 algorithm for $k$-core decomposition in Webgraph. With
                 our implementations, we show that we can efficiently
                 handle networks of billions of edges using a single
                 consumer-level machine within reasonable time and can
                 produce excellent approximations in only a fraction of
                 the execution time. To the best of our knowledge, our
                 biggest graphs are considerably larger than the graphs
                 considered in the literature. Next, we present an
                 optimized implementation of an external-memory
                 algorithm (EMcore) by Cheng, Ke, Chu, and {\"O}zsu. We
                 show that this algorithm also performs well for large
                 datasets, however, it cannot predict whether a given
                 memory budget is sufficient for a new dataset. We
                 present a thorough analysis of all algorithms
                 concluding that it is viable to compute $k$-core
                 decomposition for large networks in a consumer-grade
                 PC.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2015:WCP,
  author =       "Zhenguo Li and Yixiang Fang and Qin Liu and Jiefeng
                 Cheng and Reynold Cheng and John C. S. Lui",
  title =        "Walking in the cloud: parallel {SimRank} at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "1",
  pages =        "24--35",
  month =        sep,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:24 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Despite its popularity, SimRank is computationally
                 costly, in both time and space. In particular, its
                 recursive nature poses a great challenge in using
                 modern distributed computing power, and also prevents
                 querying similarities individually. Existing solutions
                 suffer greatly from these practical issues. In this
                 paper, we break such dependency for maximum efficiency
                 possible. Our method consists of offline and online
                 phases. In offline phase, a length- n indexing vector
                 is derived by solving a linear system in parallel. At
                 online query time, the similarities are computed
                 instantly from the index vector. Throughout, the Monte
                 Carlo method is used to maximally reduce time and
                 space. Our algorithm, called CloudWalker, is highly
                 parallelizable, with only linear time and space.
                 Remarkably, it responses to both single-pair and
                 single-source queries in constant time. CloudWalker is
                 orders of magnitude more efficient and scalable than
                 existing solutions for large-scale problems.
                 Implemented on Spark with 10 machines and tested on the
                 web-scale clue-web graph with 1 billion nodes and 43
                 billion edges, it takes 110 hours for offline indexing,
                 64 seconds for a single-pair query, and 188 seconds for
                 a single-source query. To the best of our knowledge,
                 our work is the first to report results on clue-web,
                 which is 10x larger than the largest graph ever
                 reported for SimRank computation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arocena:2015:MBE,
  author =       "Patricia C. Arocena and Boris Glavic and Giansalvatore
                 Mecca and Ren{\'e}e J. Miller and Paolo Papotti and
                 Donatello Santoro",
  title =        "Messing up with {BART}: error generation for
                 evaluating data-cleaning algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "2",
  pages =        "36--47",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of introducing errors into clean
                 databases for the purpose of benchmarking data-cleaning
                 algorithms. Our goal is to provide users with the
                 highest possible level of control over the
                 error-generation process, and at the same time develop
                 solutions that scale to large databases. We show in the
                 paper that the error-generation problem is surprisingly
                 challenging, and in fact, NP-complete. To provide a
                 scalable solution, we develop a correct and efficient
                 greedy algorithm that sacrifices completeness, but
                 succeeds under very reasonable assumptions. To scale to
                 millions of tuples, the algorithm relies on several
                 non-trivial optimizations, including a new symmetry
                 property of data quality constraints. The trade-off
                 between control and scalability is the main technical
                 contribution of the paper.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hayashi:2015:FDB,
  author =       "Takanori Hayashi and Takuya Akiba and Yuichi Yoshida",
  title =        "Fully dynamic betweenness centrality maintenance on
                 massive networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "2",
  pages =        "48--59",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Measuring the relative importance of each vertex in a
                 network is one of the most fundamental building blocks
                 in network analysis. Among several importance measures,
                 betweenness centrality, in particular, plays key roles
                 in many real applications. Considerable effort has been
                 made for developing algorithms for static settings.
                 However, real networks today are highly dynamic and are
                 evolving rapidly, and scalable dynamic methods that can
                 instantly reflect graph changes into centrality values
                 are required. In this paper, we present the first fully
                 dynamic method for managing betweenness centrality of
                 all vertices in a large dynamic network. Its main data
                 structure is the weighted hyperedge representation of
                 shortest paths called hypergraph sketch. We carefully
                 design dynamic update procedure with theoretical
                 accuracy guarantee. To accelerate updates, we further
                 propose two auxiliary data structures called two-ball
                 index and special-purpose reachability index.
                 Experimental results using real networks demonstrate
                 its high scalability and efficiency. In particular, it
                 can reflect a graph change in less than a millisecond
                 on average for a large-scale web graph with 106M
                 vertices and 3.7B edges, which is several orders of
                 magnitude larger than the limits of previous dynamic
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2015:CCC,
  author =       "Wei Lu and Wei Chen and Laks V. S. Lakshmanan",
  title =        "From competition to complementarity: comparative
                 influence diffusion and maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "2",
  pages =        "60--71",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Influence maximization is a well-studied problem that
                 asks for a small set of influential users from a social
                 network, such that by targeting them as early adopters,
                 the expected total adoption through influence cascades
                 over the network is maximized. However, almost all
                 prior work focuses on cascades of a single propagating
                 entity or purely-competitive entities. In this work, we
                 propose the Comparative Independent Cascade (Com-IC)
                 model that covers the full spectrum of entity
                 interactions from competition to complementarity. In
                 Com-IC, users' adoption decisions depend not only on
                 edge-level information propagation, but also on a
                 node-level automaton whose behavior is governed by a
                 set of model parameters, enabling our model to capture
                 not only competition, but also complementarity, to any
                 possible degree. We study two natural optimization
                 problems, Self Influence Maximization and Complementary
                 Influence Maximization, in a novel setting with
                 complementary entities. Both problems are NP-hard, and
                 we devise efficient and effective approximation
                 algorithms via non-trivial techniques based on
                 reverse-reachable sets and a novel ``sandwich
                 approximation'' strategy. The applicability of both
                 techniques extends beyond our model and problems. Our
                 experiments show that the proposed algorithms
                 consistently outperform intuitive baselines on four
                 real-world social networks, often by a significant
                 margin. In addition, we learn model parameters from
                 real user action logs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kloudas:2015:POD,
  author =       "Konstantinos Kloudas and Margarida Mamede and Nuno
                 Pregui{\c{c}}a and Rodrigo Rodrigues",
  title =        "{Pixida}: optimizing data parallel jobs in wide-area
                 data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "2",
  pages =        "72--83",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the era of global-scale services, big data
                 analytical queries are often required to process
                 datasets that span multiple data centers (DCs). In this
                 setting, cross-DC bandwidth is often the scarcest, most
                 volatile, and/or most expensive resource. However,
                 current widely deployed big data analytics frameworks
                 make no attempt to minimize the traffic traversing
                 these links. In this paper, we present P ixida, a
                 scheduler that aims to minimize data movement across
                 resource constrained links. To achieve this, we
                 introduce a new abstraction called Silo, which is key
                 to modeling Pixida's scheduling goals as a graph
                 partitioning problem. Furthermore, we show that
                 existing graph partitioning problem formulations do not
                 map to how big data jobs work, causing their solutions
                 to miss opportunities for avoiding data movement. To
                 address this, we formulate a new graph partitioning
                 problem and propose a novel algorithm to solve it. We
                 integrated Pixida in Spark and our experiments show
                 that, when compared to existing schedulers, Pixida
                 achieves a significant traffic reduction of up to $
                 \approx 9 \times $ on the aforementioned links.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2015:SOS,
  author =       "Lu Wang and Robert Christensen and Feifei Li and Ke
                 Yi",
  title =        "Spatial online sampling and aggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "84--95",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The massive adoption of smart phones and other mobile
                 devices has generated humongous amount of spatial and
                 spatio-temporal data. The importance of spatial
                 analytics and aggregation is ever-increasing. An
                 important challenge is to support interactive
                 exploration over such data. However, spatial analytics
                 and aggregation using all data points that satisfy a
                 query condition is expensive, especially over large
                 data sets, and could not meet the needs of interactive
                 exploration. To that end, we present novel indexing
                 structures that support spatial online sampling and
                 aggregation on large spatial and spatio-temporal data
                 sets. In spatial online sampling, random samples from
                 the set of spatial (or spatio-temporal) points that
                 satisfy a query condition are generated incrementally
                 in an online fashion. With more and more samples,
                 various spatial analytics and aggregations can be
                 performed in an online, interactive fashion, with
                 estimators that have better accuracy over time. Our
                 design works well for both memory-based and
                 disk-resident data sets, and scales well towards
                 different query and sample sizes. More importantly, our
                 structures are dynamic, hence, they are able to deal
                 with insertions and deletions efficiently. Extensive
                 experiments on large real data sets demonstrate the
                 improvements achieved by our indexing structures
                 compared to other baseline methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Richter:2015:SDA,
  author =       "Stefan Richter and Victor Alvarez and Jens Dittrich",
  title =        "A seven-dimensional analysis of hashing methods and
                 its implications on query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "96--107",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Hashing is a solved problem. It allows us to get
                 constant time access for lookups. Hashing is also
                 simple. It is safe to use an arbitrary method as a
                 black box and expect good performance, and
                 optimizations to hashing can only improve it by a
                 negligible delta. Why are all of the previous
                 statements plain wrong? That is what this paper is
                 about. In this paper we thoroughly study hashing for
                 integer keys and carefully analyze the most common
                 hashing methods in a five-dimensional requirements
                 space: (1) data-distribution, (2) load factor, (3)
                 dataset size, (4) read/write-ratio, and (5)
                 un/successful-ratio. Each point in that design space
                 may potentially suggest a different hashing scheme, and
                 additionally also a different hash function. We show
                 that a right or wrong decision in picking the right
                 hashing scheme and hash function combination may lead
                 to significant difference in performance. To
                 substantiate this claim, we carefully analyze two
                 additional dimensions: (6) five representative hashing
                 schemes (which includes an improved variant of Robin
                 Hood hashing), (7) four important classes of hash
                 functions widely used today. That is, we consider 20
                 different combinations in total. Finally, we also
                 provide a glimpse about the effect of table memory
                 layout and the use of SIMD instructions. Our study
                 clearly indicates that picking the right combination
                 may have considerable impact on insert and lookup
                 performance, as well as memory footprint. A major
                 conclusion of our work is that hashing should be
                 considered a white box before blindly using it in
                 applications, such as query processing. Finally, we
                 also provide a strong guideline about when to use which
                 hashing method.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arocena:2015:IIM,
  author =       "Patricia C. Arocena and Boris Glavic and Radu Ciucanu
                 and Ren{\'e}e J. Miller",
  title =        "The {iBench} integration metadata generator",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "108--119",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given the maturity of the data integration field it is
                 surprising that rigorous empirical evaluations of
                 research ideas are so scarce. We identify a major
                 roadblock for empirical work --- the lack of
                 comprehensive metadata generators that can be used to
                 create benchmarks for different integration tasks. This
                 makes it difficult to compare integration solutions,
                 understand their generality, and understand their
                 performance. We present iBench, the first metadata
                 generator that can be used to evaluate a wide-range of
                 integration tasks (data exchange, mapping creation,
                 mapping composition, schema evolution, among many
                 others). iBench permits control over the size and
                 characteristics of the metadata it generates (schemas,
                 constraints, and mappings). Our evaluation demonstrates
                 that iBench can efficiently generate very large,
                 complex, yet realistic scenarios with different
                 characteristics. We also present an evaluation of three
                 mapping creation systems using iBench and show that the
                 intricate control that iBench provides over metadata
                 scenarios can reveal new and important empirical
                 insights. iBench is an open-source, extensible tool
                 that we are providing to the community. We believe it
                 will raise the bar for empirical evaluation and
                 comparison of data integration systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Altwaijry:2015:QFI,
  author =       "Hotham Altwaijry and Sharad Mehrotra and Dmitri V.
                 Kalashnikov",
  title =        "{QuERy}: a framework for integrating entity resolution
                 with query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "120--131",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper explores an analysis-aware data cleaning
                 architecture for a large class of SPJ SQL queries. In
                 particular, we propose QuERy, a novel framework for
                 integrating entity resolution (ER) with query
                 processing. The aim of QuERy is to correctly and
                 efficiently answer complex queries issued on top of
                 dirty data. The comprehensive empirical evaluation of
                 the proposed solution demonstrates its significant
                 advantage in terms of efficiency over the traditional
                 techniques for the given problem settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2015:POM,
  author =       "Taesung Lee and Jin-woo Park and Sanghoon Lee and
                 Seung-Won Hwang and Sameh Elnikety and Yuxiong He",
  title =        "Processing and optimizing main memory spatial-keyword
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "132--143",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Important cloud services rely on spatial-keyword
                 queries, containing a spatial predicate and arbitrary
                 boolean keyword queries. In particular, we study the
                 processing of such queries in main memory to support
                 short response times. In contrast, current
                 state-of-the-art spatial-keyword indexes and relational
                 engines are designed for different assumptions. Rather
                 than building a new spatial-keyword index, we employ a
                 cost-based optimizer to process these queries using a
                 spatial index and a keyword index. We address several
                 technical challenges to achieve this goal. We introduce
                 three operators as the building blocks to construct
                 plans for main memory query processing. We then develop
                 a cost model for the operators and query plans. We
                 introduce five optimization techniques that efficiently
                 reduce the search space and produce a query plan with
                 low cost. The optimization techniques are
                 computationally efficient, and they identify a query
                 plan with a formal approximation guarantee under the
                 common independence assumption. Furthermore, we extend
                 the framework to exploit interesting orders. We
                 implement the query optimizer to empirically validate
                 our proposed approach using real-life datasets. The
                 evaluation shows that the optimizations provide
                 significant reduction in the average and tail latency
                 of query processing: 7- to 11-fold reduction over using
                 a single index in terms of 99th percentile response
                 time. In addition, this approach outperforms existing
                 spatial-keyword indexes, and DBMS query optimizers for
                 both average and high-percentile response times.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Park:2015:NSH,
  author =       "Yongjoo Park and Michael Cafarella and Barzan
                 Mozafari",
  title =        "Neighbor-sensitive hashing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "144--155",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Approximate $k$ NN ($k$-nearest neighbor) techniques
                 using binary hash functions are among the most commonly
                 used approaches for overcoming the prohibitive cost of
                 performing exact $k$ NN queries. However, the success
                 of these techniques largely depends on their hash
                 functions' ability to distinguish $k$ NN items; that
                 is, the $k$ NN items retrieved based on data items'
                 hashcodes, should include as many true $k$ NN items as
                 possible. A widely-adopted principle for this process
                 is to ensure that similar items are assigned to the
                 same hashcode so that the items with the hashcodes
                 similar to a query's hashcode are likely to be true
                 neighbors. In this work, we abandon this
                 heavily-utilized principle and pursue the opposite
                 direction for generating more effective hash functions
                 for $k$ NN tasks. That is, we aim to increase the
                 distance between similar items in the hashcode space,
                 instead of reducing it. Our contribution begins by
                 providing theoretical analysis on why this
                 revolutionary and seemingly counter-intuitive approach
                 leads to a more accurate identification of $k$ NN
                 items. Our analysis is followed by a proposal for a
                 hashing algorithm that embeds this novel principle. Our
                 empirical studies confirm that a hashing algorithm
                 based on this counter-intuitive idea significantly
                 improves the efficiency and accuracy of
                 state-of-the-art techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2015:CMB,
  author =       "Botong Huang and Nicholas W. D. Jarrett and Shivnath
                 Babu and Sayan Mukherjee and Jun Yang",
  title =        "{C{\"u}m{\"u}l{\"o}n}: matrix-based data analytics in
                 the cloud with spot instances",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "156--167",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We describe C{\"u}m{\"u}l{\"o}n, a system aimed at
                 helping users develop and deploy matrix-based data
                 analysis programs in a public cloud. A key feature of
                 C{\"u}m{\"u}l{\"o}n is its end-to-end support for the
                 so-called spot instances ---machines whose market price
                 fluctuates over time but is usually much lower than the
                 regular fixed price. A user sets a bid price when
                 acquiring spot instances, and loses them as soon as the
                 market price exceeds the bid price. While spot
                 instances can potentially save cost, they are difficult
                 to use effectively, and run the risk of not finishing
                 work while costing more. C{\"u}m{\"u}l{\"o}n provides a
                 highly elastic computation and storage engine on top of
                 spot instances, and offers automatic cost-based
                 optimization of execution, deployment, and bidding
                 strategies. C{\"u}m{\"u}l{\"o}n further quantifies how
                 the uncertainty in the market price translates into the
                 cost uncertainty of its recommendations, and allows
                 users to specify their risk tolerance as an
                 optimization constraint.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kaul:2015:NLU,
  author =       "Manohar Kaul and Raymond Chi-Wing Wong and Christian
                 S. Jensen",
  title =        "New lower and upper bounds for shortest distance
                 queries on terrains",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "168--179",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The increasing availability of massive and accurate
                 laser data enables the processing of spatial queries on
                 terrains. As shortest-path computation, an integral
                 element of query processing, is inherently expensive on
                 terrains, a key approach to enabling efficient query
                 processing is to reduce the need for exact
                 shortest-path computation in query processing. We
                 develop new lower and upper bounds on terrain shortest
                 distances that are provably tighter than any existing
                 bounds. Unlike existing bounds, the new bounds do not
                 rely on the quality of the triangulation. We show how
                 use of the new bounds speeds up query processing by
                 reducing the need for exact distance computations.
                 Speedups of of nearly an order of magnitude are
                 demonstrated empirically for well-known spatial
                 queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Freire:2015:CRR,
  author =       "Cibele Freire and Wolfgang Gatterbauer and Neil
                 Immerman and Alexandra Meliou",
  title =        "The complexity of resilience and responsibility for
                 self-join-free conjunctive queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "180--191",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Several research thrusts in the area of data
                 management have focused on understanding how changes in
                 the data affect the output of a view or standing query.
                 Example applications are explaining query results,
                 propagating updates through views, and anonymizing
                 datasets. An important aspect of this analysis is the
                 problem of deleting a minimum number of tuples from the
                 input tables to make a given Boolean query false, which
                 we refer to as `` the resilience of a query. '' In this
                 paper, we study the complexity of resilience for
                 self-join-free conjunctive queries with arbitrary
                 functional dependencies. The cornerstone of our work is
                 the novel concept of triads, a simple structural
                 property of a query that leads to the several dichotomy
                 results we show in this paper. The concepts of triads
                 and resilience bridge the connections between the
                 problems of deletion propagation and causal
                 responsibility, and allow us to substantially advance
                 the known complexity results in these topics.
                 Specifically, we show a dichotomy for the complexity of
                 resilience, which identifies previously unknown
                 tractable families for deletion propagation with source
                 side-effects, and we extend this result to account for
                 functional dependencies. Further, we identify a mistake
                 in a previous dichotomy for causal responsibility, and
                 offer a revised characterization based purely on the
                 structural form of the query (presence or absence of
                 triads). Finally, we extend the dichotomy for causal
                 responsibility in two ways: (a) we account for
                 functional dependencies in the input tables, and (b) we
                 compute responsibility for sets of tuples specified via
                 wildcards.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2015:SAD,
  author =       "Hao Huang and Shiva Prasad Kasiviswanathan",
  title =        "Streaming anomaly detection using randomized matrix
                 sketching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "192--203",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data is continuously being generated from sources such
                 as machines, network traffic, application logs, etc.
                 Timely and accurate detection of anomalies in massive
                 data streams has important applications such as in
                 preventing machine failures, intrusion detection, and
                 dynamic load balancing. In this paper, we introduce a
                 novel (unsupervised) anomaly detection framework which
                 can be used to detect anomalies in a streaming fashion
                 by making only one pass over the data while utilizing
                 limited storage. We adapt ideas from matrix sketching
                 to maintain, in a streaming model, a set of few
                 orthogonal vectors that form a good approximate basis
                 for all the observed data. Using this constructed
                 orthogonal basis, anomalies in new incoming data are
                 detected based on a simple reconstruction error test.
                 We theoretically prove that our algorithm compares
                 favorably with an offline approach based on expensive
                 global singular value decomposition (SVD) updates.
                 Additionally, we apply ideas from randomized low-rank
                 matrix approximations to further speedup the algorithm.
                 The experimental results show the effectiveness and
                 efficiency of our approach over other popular scalable
                 anomaly detection approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Leis:2015:HGQ,
  author =       "Viktor Leis and Andrey Gubichev and Atanas Mirchev and
                 Peter Boncz and Alfons Kemper and Thomas Neumann",
  title =        "How good are query optimizers, really?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "204--215",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding a good join order is crucial for query
                 performance. In this paper, we introduce the Join Order
                 Benchmark (JOB) and experimentally revisit the main
                 components in the classic query optimizer architecture
                 using a complex, real-world data set and realistic
                 multi-join queries. We investigate the quality of
                 industrial-strength cardinality estimators and find
                 that all estimators routinely produce large errors. We
                 further show that while estimates are essential for
                 finding a good join order, query performance is
                 unsatisfactory if the query engine relies too heavily
                 on these estimates. Using another set of experiments
                 that measure the impact of the cost model, we find that
                 it has much less influence on query performance than
                 the cardinality estimates. Finally, we investigate plan
                 enumeration techniques comparing exhaustive dynamic
                 programming with heuristic algorithms and find that
                 exhaustive enumeration improves performance despite the
                 sub-optimal cardinality estimates.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Interlandi:2015:TDP,
  author =       "Matteo Interlandi and Kshitij Shah and Sai Deep Tetali
                 and Muhammad Ali Gulzar and Seunghyun Yoo and Miryung
                 Kim and Todd Millstein and Tyson Condie",
  title =        "{Titian}: data provenance support in {Spark}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "3",
  pages =        "216--227",
  month =        nov,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 2 14:26:50 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Debugging data processing logic in Data-Intensive
                 Scalable Computing (DISC) systems is a difficult and
                 time consuming effort. Today's DISC systems offer very
                 little tooling for debugging programs, and as a result
                 programmers spend countless hours collecting evidence (
                 e.g., from log files) and performing trial and error
                 debugging. To aid this effort, we built Titian, a
                 library that enables data provenance ---tracking data
                 through transformations---in Apache Spark. Data
                 scientists using the Titian Spark extension will be
                 able to quickly identify the input data at the root
                 cause of a potential bug or outlier result. Titian is
                 built directly into the Spark platform and offers data
                 provenance support at interactive
                 speeds---orders-of-magnitude faster than alternative
                 solutions---while minimally impacting Spark job
                 performance; observed overheads for capturing data
                 lineage rarely exceed 30\% above the baseline job
                 execution time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rodiger:2015:HSQ,
  author =       "Wolf R{\"o}diger and Tobias M{\"u}hlbauer and Alfons
                 Kemper and Thomas Neumann",
  title =        "High-speed query processing over high-speed networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "228--239",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern database clusters entail two levels of
                 networks: connecting CPUs and NUMA regions inside a
                 single server in the small and multiple servers in the
                 large. The huge performance gap between these two types
                 of networks used to slow down distributed query
                 processing to such an extent that a cluster of machines
                 actually performed worse than a single many-core
                 server. The increased main-memory capacity of the
                 cluster remained the sole benefit of such a scale-out.
                 The economic viability of high-speed interconnects such
                 as InfiniBand has narrowed this performance gap
                 considerably. However, InfiniBand's higher network
                 bandwidth alone does not improve query performance as
                 expected when the distributed query engine is left
                 unchanged. The scalability of distributed query
                 processing is impaired by TCP overheads, switch
                 contention due to uncoordinated communication, and load
                 imbalances resulting from the inflexibility of the
                 classic exchange operator model. This paper presents
                 the blueprint for a distributed query engine that
                 addresses these problems by considering both levels of
                 networks holistically. It consists of two parts: First,
                 hybrid parallelism that distinguishes local and
                 distributed parallelism for better scalability in both
                 the number of cores as well as servers. Second, a novel
                 communication multiplexer tailored for analytical
                 database workloads using remote direct memory access
                 (RDMA) and low-latency network scheduling for
                 high-speed communication with almost no CPU overhead.
                 An extensive evaluation within the HyPer database
                 system using the TPC-H benchmark shows that our
                 holistic approach indeed enables high-speed query
                 processing over high-speed networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zong:2015:BQD,
  author =       "Bo Zong and Xusheng Xiao and Zhichun Li and Zhenyu Wu
                 and Zhiyun Qian and Xifeng Yan and Ambuj K. Singh and
                 Guofei Jiang",
  title =        "Behavior query discovery in system-generated temporal
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "240--251",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Computer system monitoring generates huge amounts of
                 logs that record the interaction of system entities.
                 How to query such data to better understand system
                 behaviors and identify potential system risks and
                 malicious behaviors becomes a challenging task for
                 system administrators due to the dynamics and
                 heterogeneity of the data. System monitoring data are
                 essentially heterogeneous temporal graphs with nodes
                 being system entities and edges being their
                 interactions over time. Given the complexity of such
                 graphs, it becomes time-consuming for system
                 administrators to manually formulate useful queries in
                 order to examine abnormal activities, attacks, and
                 vulnerabilities in computer systems. In this work, we
                 investigate how to query temporal graphs and treat
                 query formulation as a discriminative temporal graph
                 pattern mining problem. We introduce TGMiner to mine
                 discriminative patterns from system logs, and these
                 patterns can be taken as templates for building more
                 complex queries. TGMiner leverages temporal information
                 in graphs to prune graph patterns that share similar
                 growth trend without compromising pattern quality.
                 Experimental results on real system data show that
                 TGMiner is 6-32 times faster than baseline methods. The
                 discovered patterns were verified by system experts;
                 they achieved high precision (97\%) and recall
                 (91\%).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kocberber:2015:AMA,
  author =       "Onur Kocberber and Babak Falsafi and Boris Grot",
  title =        "Asynchronous memory access chaining",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "252--263",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In-memory databases rely on pointer-intensive data
                 structures to quickly locate data in memory. A single
                 lookup operation in such data structures often exhibits
                 long-latency memory stalls due to dependent pointer
                 dereferences. Hiding the memory latency by launching
                 additional memory accesses for other lookups is an
                 effective way of improving performance of
                 pointer-chasing codes (e.g., hash table probes, tree
                 traversals). The ability to exploit such inter-lookup
                 parallelism is beyond the reach of modern out-of-order
                 cores due to the limited size of their instruction
                 window. Instead, recent work has proposed software
                 prefetching techniques that exploit inter-lookup
                 parallelism by arranging a set of independent lookups
                 into a group or a pipeline, and navigate their
                 respective pointer chains in a synchronized fashion.
                 While these techniques work well for highly regular
                 access patterns, they break down in the face of
                 irregularity across lookups. Such irregularity includes
                 variable-length pointer chains, early exit, and
                 read/write dependencies. This work introduces
                 Asynchronous Memory Access Chaining (AMAC), a new
                 approach for exploiting inter-lookup parallelism to
                 hide the memory access latency. AMAC achieves high
                 dynamism in dealing with irregularity across lookups by
                 maintaining the state of each lookup separately from
                 that of other lookups. This feature enables AMAC to
                 initiate a new lookup as soon as any of the in-flight
                 lookups complete. In contrast, the static arrangement
                 of lookups into a group or pipeline in existing
                 techniques precludes such adaptivity. Our results show
                 that AMAC matches or outperforms state-of-the-art
                 prefetching techniques on regular access patterns,
                 while delivering up to 2.3x higher performance under
                 irregular data structure lookups. AMAC fully utilizes
                 the available microarchitectural resources, generating
                 the maximum number of memory accesses allowed by
                 hardware in both single- and multi-threaded execution
                 modes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Haney:2015:DPA,
  author =       "Samuel Haney and Ashwin Machanavajjhala and Bolin
                 Ding",
  title =        "Design of policy-aware differentially private
                 algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "264--275",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of designing error optimal differentially
                 private algorithms is well studied. Recent work
                 applying differential privacy to real world settings
                 have used variants of differential privacy that
                 appropriately modify the notion of neighboring
                 databases. The problem of designing error optimal
                 algorithms for such variants of differential privacy is
                 open. In this paper, we show a novel transformational
                 equivalence result that can turn the problem of query
                 answering under differential privacy with a modified
                 notion of neighbors to one of query answering under
                 standard differential privacy, for a large class of
                 neighbor definitions. We utilize the Blowfish privacy
                 framework that generalizes differential privacy.
                 Blowfish uses a policy graph to instantiate different
                 notions of neighboring databases. We show that the
                 error incurred when answering a workload W on a
                 database x under a Blowfish policy graph G is identical
                 to the error required to answer a transformed workload
                 f$_G$ (W) on database g$_G$ (x) under standard
                 differential privacy, where f$_G$ and g$_G$ are linear
                 transformations based on G. Using this result, we
                 develop error efficient algorithms for releasing
                 histograms and multidimensional range queries under
                 different Blowfish policies. We believe the tools we
                 develop will be useful for finding mechanisms to answer
                 many other classes of queries with low error under
                 other policy graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2015:ACC,
  author =       "Xin Huang and Laks V. S. Lakshmanan and Jeffrey Xu Yu
                 and Hong Cheng",
  title =        "Approximate closest community search in networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "276--287",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, there has been significant interest in the
                 study of the community search problem in social and
                 information networks: given one or more query nodes,
                 find densely connected communities containing the query
                 nodes. However, most existing studies do not address
                 the ``free rider'' issue, that is, nodes far away from
                 query nodes and irrelevant to them are included in the
                 detected community. Some state-of-the-art models have
                 attempted to address this issue, but not only are their
                 formulated problems NP-hard, they do not admit any
                 approximations without restrictive assumptions, which
                 may not always hold in practice. In this paper, given
                 an undirected graph G and a set of query nodes Q, we
                 study community search using the k -truss based
                 community model. We formulate our problem of finding a
                 closest truss community (CTC), as finding a connected k
                 truss subgraph with the largest k that contains Q, and
                 has the minimum diameter among such subgraphs. We prove
                 this problem is NP-hard. Furthermore, it is NP-hard to
                 approximate the problem within a factor $ (2 -
                 \epsilon) $, for any $ \epsilon > 0 $. However, we
                 develop a greedy algorithmic framework, which first
                 finds a CTC containing Q, and then iteratively removes
                 the furthest nodes from Q, from the graph. The method
                 achieves 2-approximation to the optimal solution. To
                 further improve the efficiency, we make use of a
                 compact truss index and develop efficient algorithms
                 for k -truss identification and maintenance as nodes
                 get eliminated. In addition, using bulk deletion
                 optimization and local exploration strategies, we
                 propose two more efficient algorithms. One of them
                 trades some approximation quality for efficiency while
                 the other is a very efficient heuristic. Extensive
                 experiments on 6 real-world networks show the
                 effectiveness and efficiency of our community model and
                 search algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Andre:2015:CLE,
  author =       "Fabien Andr{\'e} and Anne-Marie Kermarrec and Nicolas
                 {Le Scouarnec}",
  title =        "Cache locality is not enough: high-performance nearest
                 neighbor search with product quantization fast scan",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "288--299",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nearest Neighbor (NN) search in high dimension is an
                 important feature in many applications (e.g., image
                 retrieval, multimedia databases). Product Quantization
                 (PQ) is a widely used solution which offers high
                 performance, i.e., low response time while preserving a
                 high accuracy. PQ represents high-dimensional vectors
                 (e.g., image descriptors) by compact codes. Hence, very
                 large databases can be stored in memory, allowing NN
                 queries without resorting to slow I/O operations. PQ
                 computes distances to neighbors using cache-resident
                 lookup tables, thus its performance remains limited by
                 (i) the many cache accesses that the algorithm
                 requires, and (ii) its inability to leverage SIMD
                 instructions available on modern CPUs. In this paper,
                 we advocate that cache locality is not sufficient for
                 efficiency. To address these limitations, we design a
                 novel algorithm, PQ Fast Scan, that transforms the
                 cache-resident lookup tables into small tables, sized
                 to fit SIMD registers. This transformation allows (i)
                 in-register lookups in place of cache accesses and (ii)
                 an efficient SIMD implementation. PQ Fast Scan has the
                 exact same accuracy as PQ, while having 4 to 6 times
                 lower response time (e.g., for 25 million vectors, scan
                 time is reduced from 74ms to 13ms).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Prokoshyna:2015:CQL,
  author =       "Nataliya Prokoshyna and Jaros{\l}aw Szlichta and Fei
                 Chiang and Ren{\'e}e J. Miller and Divesh Srivastava",
  title =        "Combining quantitative and logical data cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "300--311",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Quantitative data cleaning relies on the use of
                 statistical methods to identify and repair data quality
                 problems while logical data cleaning tackles the same
                 problems using various forms of logical reasoning over
                 declarative dependencies. Each of these approaches has
                 its strengths: the logical approach is able to capture
                 subtle data quality problems using sophisticated
                 dependencies, while the quantitative approach excels at
                 ensuring that the repaired data has desired statistical
                 properties. We propose a novel framework within which
                 these two approaches can be used synergistically to
                 combine their respective strengths. We instantiate our
                 framework using (i) metric functional dependencies, a
                 type of dependency that generalizes functional
                 dependencies (FDs) to identify inconsistencies in
                 domains where only large differences in metric data are
                 considered to be a data quality problem, and (ii)
                 repairs that modify the inconsistent data so as to
                 minimize statistical distortion, measured using the
                 Earth Mover's Distance. We show that the problem of
                 computing a statistical distortion minimal repair is
                 NP-hard. Given this complexity, we present an efficient
                 algorithm for finding a minimal repair that has a small
                 statistical distortion using EMD computation over
                 semantically related attributes. To identify
                 semantically related attributes, we present a sound and
                 complete axiomatization and an efficient algorithm for
                 testing implication of metric FDs. While the complexity
                 of inference for some other FD extensions is co-NP
                 complete, we show that the inference problem for metric
                 FDs remains linear, as in traditional FDs. We prove
                 that every instance that can be generated by our repair
                 algorithm is set-minimal (with no unnecessary changes).
                 Our experimental evaluation demonstrates that our
                 techniques obtain a considerably lower statistical
                 distortion than existing repair techniques, while
                 achieving similar levels of efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papadakis:2015:SAV,
  author =       "George Papadakis and George Alexiou and George
                 Papastefanatos and Georgia Koutrika",
  title =        "Schema-agnostic vs schema-based configurations for
                 blocking methods on homogeneous data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "312--323",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity Resolution constitutes a core task for data
                 integration that, due to its quadratic complexity,
                 typically scales to large datasets through blocking
                 methods. These can be configured in two ways. The
                 schema-based configuration relies on schema information
                 in order to select signatures of high distinctiveness
                 and low noise, while the schema-agnostic one treats
                 every token from all attribute values as a signature.
                 The latter approach has significant potential, as it
                 requires no fine-tuning by human experts and it applies
                 to heterogeneous data. Yet, there is no systematic
                 study on its relative performance with respect to the
                 schema-based configuration. This work covers this gap
                 by comparing analytically the two configurations in
                 terms of effectiveness, time efficiency and
                 scalability. We apply them to 9 established blocking
                 methods and to 11 benchmarks of structured data. We
                 provide valuable insights into the internal
                 functionality of the blocking methods with the help of
                 a novel taxonomy. Our studies reveal that the
                 schema-agnostic configuration offers unsupervised and
                 robust definition of blocking keys under versatile
                 settings, trading a higher computational cost for a
                 consistently higher recall than the schema-based one.
                 It also enables the use of state-of-the-art blocking
                 methods without schema knowledge.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Epasto:2015:ENC,
  author =       "Alessandro Epasto and Silvio Lattanzi and Vahab
                 Mirrokni and Ismail Oner Sebe and Ahmed Taei and Sunita
                 Verma",
  title =        "Ego-net community mining applied to friend
                 suggestion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "324--335",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we present a study of the community
                 structure of ego-networks---the graphs representing the
                 connections among the neighbors of a node---for several
                 online social networks. Toward this goal, we design a
                 new technique to efficiently build and cluster all the
                 ego-nets of a graph in parallel (note that even just
                 building the ego-nets efficiently is challenging on
                 large networks). Our experimental findings are quite
                 compelling: at a microscopic level it is easy to detect
                 high quality communities. Leveraging on this fact we,
                 then, develop new features for friend suggestion based
                 on co-occurrences of two nodes in different ego-nets'
                 communities. Our new features can be computed
                 efficiently on very large scale graphs by just
                 analyzing the neighborhood of each node. Furthermore,
                 we prove formally on a stylized model, and by
                 experimental analysis that this new similarity measure
                 outperforms the classic local features employed for
                 friend suggestions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abedjan:2015:TRD,
  author =       "Ziawasch Abedjan and Cuneyt G. Akcora and Mourad
                 Ouzzani and Paolo Papotti and Michael Stonebraker",
  title =        "Temporal rules discovery for web data cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "336--347",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Declarative rules, such as functional dependencies,
                 are widely used for cleaning data. Several systems take
                 them as input for detecting errors and computing a
                 ``clean'' version of the data. To support domain
                 experts, in specifying these rules, several tools have
                 been proposed to profile the data and mine rules.
                 However, existing discovery techniques have
                 traditionally ignored the time dimension. Recurrent
                 events, such as persons reported in locations, have a
                 duration in which they are valid, and this duration
                 should be part of the rules or the cleaning process
                 would simply fail. In this work, we study the rule
                 discovery problem for temporal web data. Such a
                 discovery process is challenging because of the nature
                 of web data; extracted facts are (i) sparse over time,
                 (ii) reported with delays, and (iii) often reported
                 with errors over the values because of inaccurate
                 sources or non robust extractors. We handle these
                 challenges with a new discovery approach that is more
                 robust to noise. Our solution uses machine learning
                 methods, such as association measures and outlier
                 detection, for the discovery of the rules, together
                 with an aggressive repair of the data in the mining
                 step itself. Our experimental evaluation over
                 real-world data from Recorded Future, an intelligence
                 company that monitors over 700K Web sources, shows that
                 temporal rules improve the quality of the data with an
                 increase of the average precision in the cleaning
                 process from 0.37 to 0.84, and a 40\% relative increase
                 in the average F-measure.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Roy:2015:EQA,
  author =       "Sudeepa Roy and Laurel Orr and Dan Suciu",
  title =        "Explaining query answers with explanation-ready
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "348--359",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increased generation and availability of big
                 data in different domains, there is an imminent
                 requirement for data analysis tools that are able to
                 'explain' the trends and anomalies obtained from this
                 data to a range of users with different backgrounds.
                 Wu-Madden (PVLDB 2013) and Roy-Suciu (SIGMOD 2014)
                 recently proposed solutions that can explain
                 interesting or unexpected answers to simple aggregate
                 queries in terms of predicates on attributes. In this
                 paper, we propose a generic framework that can support
                 much richer, insightful explanations by preparing the
                 database offline, so that top explanations can be found
                 interactively at query time. The main idea in such
                 explanation-ready databases is to pre-compute the
                 effects of potential explanations (called interventions
                 ), and efficiently re-evaluate the original query
                 taking into account these effects. We formalize this
                 notion and define an explanation-query that can
                 evaluate all possible explanations simultaneously
                 without having to run an iterative process, develop
                 algorithms and optimizations, and evaluate our approach
                 with experiments on real data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deng:2015:EPB,
  author =       "Dong Deng and Guoliang Li and He Wen and Jianhua
                 Feng",
  title =        "An efficient partition based method for exact set
                 similarity joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "360--371",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the exact set similarity join problem, which,
                 given two collections of sets, finds out all the
                 similar set pairs from the collections. Existing
                 methods generally utilize the prefix filter based
                 framework. They generate a prefix for each set and
                 prune all the pairs whose prefixes are disjoint.
                 However the pruning power is limited, because if two
                 dissimilar sets share a common element in their
                 prefixes, they cannot be pruned. To address this
                 problem, we propose a partition-based framework. We
                 design a partition scheme to partition the sets into
                 several subsets and guarantee that two sets are similar
                 only if they share a common subset. To improve the
                 pruning power, we propose a mixture of the subsets and
                 their 1-deletion neighborhoods (the subset of a set by
                 eliminating one element). As there are multiple
                 allocation strategies to generate the mixture, we
                 evaluate different allocations and design a
                 dynamic-programming algorithm to select the optimal
                 one. However the time complexity of generating the
                 optimal one is $ O(s^3) $ for a set with size $s$. To
                 speed up the allocation selection, we develop a greedy
                 algorithm with an approximation ratio of 2. To further
                 reduce the complexity, we design an adaptive grouping
                 mechanism, and the two techniques can reduce the
                 complexity to $ O(s \log s)$. Experimental results on
                 three real-world datasets show our method achieves high
                 performance and outperforms state-of-the-art methods by
                 2-5 times.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Haas:2015:CSC,
  author =       "Daniel Haas and Jiannan Wang and Eugene Wu and Michael
                 J. Franklin",
  title =        "{CLAMShell}: speeding up crowds for low-latency data
                 labeling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "4",
  pages =        "372--383",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Dec 19 17:42:25 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data labeling is a necessary but often slow process
                 that impedes the development of interactive systems for
                 modern data analysis. Despite rising demand for manual
                 data labeling, there is a surprising lack of work
                 addressing its high and unpredictable latency. In this
                 paper, we introduce CLAMShell, a system that speeds up
                 crowds in order to achieve consistently low-latency
                 data labeling. We offer a taxonomy of the sources of
                 labeling latency and study several large crowd-sourced
                 labeling deployments to understand their empirical
                 latency profiles. Driven by these insights, we
                 comprehensively tackle each source of latency, both by
                 developing novel techniques such as straggler
                 mitigation and pool maintenance and by optimizing
                 existing methods such as crowd retainer pools and
                 active learning. We evaluate CLAMShell in simulation
                 and on live workers on Amazon's Mechanical Turk,
                 demonstrating that our techniques can provide an order
                 of magnitude speedup and variance reduction over
                 existing crowdsourced labeling strategies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Firmani:2016:OER,
  author =       "Donatella Firmani and Barna Saha and Divesh
                 Srivastava",
  title =        "Online entity resolution using an oracle",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "5",
  pages =        "384--395",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Jan 11 17:54:24 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity resolution (ER) is the task of identifying all
                 records in a database that refer to the same underlying
                 entity. This is an expensive task, and can take a
                 significant amount of money and time; the end-user may
                 want to take decisions during the process, rather than
                 waiting for the task to be completed. We formalize an
                 online version of the entity resolution task, and use
                 an oracle which correctly labels matching and
                 non-matching pairs through queries. In this setting, we
                 design algorithms that seek to maximize progressive
                 recall, and develop a novel analysis framework for
                 prior proposals on entity resolution with an oracle,
                 beyond their worst case guarantees. Finally, we provide
                 both theoretical and experimental analysis of the
                 proposed algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Calautti:2016:EEG,
  author =       "Marco Calautti and Sergio Greco and Cristian Molinaro
                 and Irina Trubitsyna",
  title =        "Exploiting equality generating dependencies in
                 checking chase termination",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "5",
  pages =        "396--407",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Jan 11 17:54:24 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The chase is a well-known algorithm with a wide range
                 of applications in data exchange, data cleaning, data
                 integration, query optimization, and ontological
                 reasoning. Since the chase evaluation might not
                 terminate and it is undecidable whether it terminates,
                 the problem of defining (decidable) sufficient
                 conditions ensuring termination has received a great
                 deal of interest in recent years. In this regard,
                 several termination criteria have been proposed. One of
                 the main weaknesses of current approaches is the
                 limited analysis they perform on equality generating
                 dependencies (EGDs). In this paper, we propose
                 sufficient conditions ensuring that a set of
                 dependencies has at least one terminating chase
                 sequence. We propose novel criteria which are able to
                 perform a more accurate analysis of EGDs. Specifically,
                 we propose a new stratification criterion and an
                 adornment algorithm. The latter can both be used as a
                 termination criterion and be combined with current
                 techniques to make them more effective, in that
                 strictly more sets of dependencies are identified. Our
                 techniques identify sets of dependencies that are not
                 recognized by any of the current criteria.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2016:SBF,
  author =       "Tong Yang and Alex X. Liu and Muhammad Shahzad and
                 Yuankun Zhong and Qiaobin Fu and Zi Li and Gaogang Xie
                 and Xiaoming Li",
  title =        "A shifting {Bloom} filter framework for set queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "5",
  pages =        "408--419",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Jan 11 17:54:24 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Set queries are fundamental operations in computer
                 systems and applications. This paper addresses the
                 fundamental problem of designing a probabilistic data
                 structure that can quickly process set queries using a
                 small amount of memory. We propose a Shifting Bloom
                 Filter (ShBF) framework for representing and querying
                 sets. We demonstrate the effectiveness of ShBF using
                 three types of popular set queries: membership,
                 association, and multiplicity queries. The key novelty
                 of ShBF is on encoding the auxiliary information of a
                 set element in a location offset. In contrast, prior BF
                 based set data structures allocate additional memory to
                 store auxiliary information. We conducted experiments
                 using real-world network traces, and results show that
                 ShBF significantly advances the state-of-the-art on all
                 three types of set queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2016:HTM,
  author =       "Fan Yang and Jinfeng Li and James Cheng",
  title =        "{Husky}: towards a more efficient and expressive
                 distributed computing framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "5",
  pages =        "420--431",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Jan 11 17:54:24 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding efficient, expressive and yet intuitive
                 programming models for data-parallel computing system
                 is an important and open problem. Systems like Hadoop
                 and Spark have been widely adopted for massive data
                 processing, as coarse-grained primitives like map and
                 reduce are succinct and easy to master. However,
                 sometimes over-simplified API hinders programmers from
                 more fine-grained control and designing more efficient
                 algorithms. Developers may have to resort to
                 sophisticated domain-specific languages (DSLs), or even
                 low-level layers like MPI, but this raises development
                 cost---learning many mutually exclusive systems
                 prolongs the development schedule, and the use of
                 low-level tools may result in bug-prone programming.
                 This motivated us to start the Husky open-source
                 project, which is an attempt to strike a better balance
                 between high performance and low development cost.
                 Husky is developed mainly for in-memory large scale
                 data mining, and also serves as a general research
                 platform for designing efficient distributed
                 algorithms. We show that many existing frameworks can
                 be easily implemented and bridged together inside
                 Husky, and Husky is able to achieve similar or even
                 better performance compared with domain-specific
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2016:RDT,
  author =       "Zeyu Li and Hongzhi Wang and Wei Shao and Jianzhong Li
                 and Hong Gao",
  title =        "Repairing data through regular expressions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "5",
  pages =        "432--443",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Jan 11 17:54:24 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Since regular expressions are often used to detect
                 errors in sequences such as strings or date, it is
                 natural to use them for data repair. Motivated by this,
                 we propose a data repair method based on regular
                 expression to make the input sequence data obey the
                 given regular expression with minimal revision cost.
                 The proposed method contains two steps, sequence repair
                 and token value repair. For sequence repair, we propose
                 the Regular-expression-based Structural Repair (RSR in
                 short) algorithm. RSR algorithm is a dynamic
                 programming algorithm that utilizes Nondeterministic
                 Finite Automata (NFA) to calculate the edit distance
                 between a prefix of the input string and a partial
                 pattern regular expression with time complexity of $ O
                 (n m^2) $ and space complexity of $ O(m n) $ where $m$
                 is the edge number of NFA and $n$ is the input string
                 length. We also develop an optimization strategy to
                 achieve higher performance for long strings. For token
                 value repair, we combine the edit-distance-based method
                 and associate rules by a unified argument for the
                 selection of the proper method. Experimental results on
                 both real and synthetic data show that the proposed
                 method could repair the data effectively and
                 efficiently.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yan:2016:LLC,
  author =       "Cong Yan and Alvin Cheung",
  title =        "Leveraging lock contention to improve {OLTP}
                 application performance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "5",
  pages =        "444--455",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Jan 11 17:54:24 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Locking is one of the predominant costs in transaction
                 processing. While much work has focused on designing
                 efficient concurrency control mechanisms, not much has
                 been done on understanding how transaction applications
                 issue queries and leveraging application semantics to
                 improve application performance. This paper presents Q
                 uro, a query-aware compiler that automatically reorders
                 queries in transaction code to improve performance.
                 Observing that certain queries within a transaction are
                 more contentious than others as they require locking
                 the same tuples as other concurrently executing
                 transactions, Quro automatically changes the
                 application such that contentious queries are issued as
                 late as possible. We have evaluated Quro on various
                 transaction benchmarks, and our results show that
                 Quro-generated implementations can increase transaction
                 throughput by up to 6.53x, while reduce transaction
                 latency by up to 85\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Choudhury:2016:MBR,
  author =       "Farhana M. Choudhury and J. Shane Culpepper and Timos
                 Sellis and Xin Cao",
  title =        "Maximizing bichromatic reverse spatial and textual $k$
                 nearest neighbor queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "6",
  pages =        "456--467",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:09:59 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of maximizing bichromatic reverse $k$
                 nearest neighbor queries (BR $k$ NN) has been
                 extensively studied in spatial databases. In this work,
                 we present a related query for spatial-textual
                 databases that finds an optimal location, and a set of
                 keywords that maximizes the size of bichromatic reverse
                 spatial textual $k$ nearest neighbors (MaxBRST $k$ NN).
                 Such a query has many practical applications including
                 social media advertisements where a limited number of
                 relevant advertisements are displayed to each user. The
                 problem is to find the location and the text contents
                 to include in an advertisement so that it will be
                 displayed to the maximum number of users. The
                 increasing availability of spatial-textual collections
                 allows us to answer these queries for both spatial
                 proximity and textual similarity. This paper is the
                 first to consider the MaxBRST $k$ NN query. We show
                 that the problem is NP-hard and present both
                 approximate and exact solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Subercaze:2016:IFM,
  author =       "Julien Subercaze and Christophe Gravier and Jules
                 Chevalier and Frederique Laforest",
  title =        "{Inferray}: fast in-memory {RDF} inference",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "6",
  pages =        "468--479",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:09:59 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The advent of semantic data on the Web requires
                 efficient reasoning systems to infer RDF and OWL data.
                 The linked nature and the huge volume of data entail
                 efficiency and scalability challenges when designing
                 productive inference systems. This paper presents
                 Inferray, an implementation of RDFS, $ \rho $ df, and
                 RDFS-Plus inference with improved performance over
                 existing solutions. The main features of Inferray are
                 (1) a storage layout based on vertical partitioning
                 that guarantees sequential access and efficient
                 sort-merge join inference; (2) efficient sorting of
                 pairs of 64-bit integers using ad-hoc optimizations on
                 MSD radix and a custom counting sort; (3) a dedicated
                 temporary storage to perform efficient graph closure
                 computation. Our measurements on synthetic and
                 real-world datasets show improvements over competitors
                 on RDFS-Plus, and up to several orders of magnitude for
                 transitivity closure.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Makreshanski:2016:MES,
  author =       "Darko Makreshanski and Georgios Giannikis and Gustavo
                 Alonso and Donald Kossmann",
  title =        "{MQJoin}: efficient shared execution of main-memory
                 joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "6",
  pages =        "480--491",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:09:59 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database architectures typically process queries
                 one-at-a-time, executing concurrent queries in
                 independent execution contexts. Often, such a design
                 leads to unpredictable performance and poor
                 scalability. One approach to circumvent the problem is
                 to take advantage of sharing opportunities across
                 concurrently running queries. In this paper we propose
                 Many-Query Join (MQJoin), a novel method for sharing
                 the execution of a join that can efficiently deal with
                 hundreds of concurrent queries. This is achieved by
                 minimizing redundant work and making efficient use of
                 main-memory bandwidth and multi-core architectures.
                 Compared to existing proposals, MQJoin is able to
                 efficiently handle larger workloads regardless of the
                 schema by exploiting more sharing opportunities. We
                 also compared MQJoin to two commercial main-memory
                 column-store databases. For a TPC-H based workload, we
                 show that MQJoin provides 2--5x higher throughput with
                 significantly more stable response times.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abeywickrama:2016:NNR,
  author =       "Tenindra Abeywickrama and Muhammad Aamir Cheema and
                 David Taniar",
  title =        "$k$-nearest neighbors on road networks: a journey in
                 experimentation and in-memory implementation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "6",
  pages =        "492--503",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:09:59 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A $k$ nearest neighbor ($k$ NN) query on road networks
                 retrieves the $k$ closest points of interest (POIs) by
                 their network distances from a given location. Today,
                 in the era of ubiquitous mobile computing, this is a
                 highly pertinent query. While Euclidean distance has
                 been used as a heuristic to search for the closest POIs
                 by their road network distance, its efficacy has not
                 been thoroughly investigated. The most recent methods
                 have shown significant improvement in query
                 performance. Earlier studies, which proposed disk-based
                 indexes, were compared to the current state-of-the-art
                 in main memory. However, recent studies have shown that
                 main memory comparisons can be challenging and require
                 careful adaptation. This paper presents an extensive
                 experimental investigation in main memory to settle
                 these and several other issues. We use efficient and
                 fair memory-resident implementations of each method to
                 reproduce past experiments and conduct additional
                 comparisons for several overlooked evaluations. Notably
                 we revisit a previously discarded technique (IER)
                 showing that, through a simple improvement, it is often
                 the best performing technique.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2016:BRF,
  author =       "Yuan Yuan and Kaibo Wang and Rubao Lee and Xiaoning
                 Ding and Jing Xing and Spyros Blanas and Xiaodong
                 Zhang",
  title =        "{BCC}: reducing false aborts in optimistic concurrency
                 control with low cost for in-memory databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "6",
  pages =        "504--515",
  month =        jan,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:09:59 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Optimistic Concurrency Control (OCC) method has
                 been commonly used for in-memory databases to ensure
                 transaction serializability --- a transaction will be
                 aborted if its read set has been changed during
                 execution. This simple criterion to abort transactions
                 causes a large proportion of false positives, leading
                 to excessive transaction aborts. Transactions aborted
                 false-positively (i.e. false aborts) waste system
                 resources and can significantly degrade system
                 throughput (as much as 3.68x based on our experiments)
                 when data contention is intensive. Modern in-memory
                 databases run on systems with increasingly parallel
                 hardware and handle workloads with growing concurrency.
                 They must efficiently deal with data contention in the
                 presence of greater concurrency by minimizing false
                 aborts. This paper presents a new concurrency control
                 method named Balanced Concurrency Control (BCC) which
                 aborts transactions more carefully than OCC does. BCC
                 detects data dependency patterns which can more
                 reliably indicate unserializable transactions than the
                 criterion used in OCC. The paper studies the design
                 options and implementation techniques that can
                 effectively detect data contention by identifying
                 dependency patterns with low overhead. To test the
                 performance of BCC, we have implemented it in Silo and
                 compared its performance against that of the vanilla
                 Silo system with OCC and two-phase locking (2PL). Our
                 extensive experiments with TPC-W-like, TPC-C-like and
                 YCSB workloads demonstrate that when data contention is
                 intensive, BCC can increase transaction throughput by
                 more than 3x versus OCC and more than 2x versus 2PL;
                 meanwhile, BCC has comparable performance with OCC for
                 workloads with low data contention.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2016:EEG,
  author =       "Long Yuan and Lu Qin and Xuemin Lin and Lijun Chang
                 and Wenjie Zhang",
  title =        "{I/O} efficient {ECC} graph decomposition via graph
                 reduction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "7",
  pages =        "516--527",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:10:00 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of computing $k$-edge connected components
                 ($k$-ECCs) of a graph G for a specific $k$ is a
                 fundamental graph problem and has been investigated
                 recently. In this paper, we study the problem of ECC
                 decomposition, which computes the $k$-ECCs of a graph G
                 for all $k$ values. ECC decomposition can be widely
                 applied in a variety of applications such as
                 graph-topology analysis, community detection, Steiner
                 component search, and graph visualization. A
                 straightforward solution for ECC decomposition is to
                 apply the existing $k$-ECC computation algorithm to
                 compute the $k$-ECCs for all $k$ values. However, this
                 solution is not applicable to large graphs for two
                 challenging reasons. First, all existing $k$-ECC
                 computation algorithms are highly memory intensive due
                 to the complex data structures used in the algorithms.
                 Second, the number of possible $k$ values can be very
                 large, resulting in a high computational cost when each
                 $k$ value is independently considered. In this paper,
                 we address the above challenges, and study I/O
                 efficient ECC decomposition via graph reduction. We
                 introduce two elegant graph reduction operators which
                 aim to reduce the size of the graph loaded in memory
                 while preserving the connectivity information of a
                 certain set of edges to be computed for a specific k.
                 We also propose three novel I/O efficient algorithms,
                 Bottom-Up, Top-Down, and Hybrid, that explore the $k$
                 values in different orders to reduce the redundant
                 computations between different $k$ values. We analyze
                 the I/O and memory costs for all proposed algorithms.
                 In our experiments, we evaluate our algorithms using
                 seven real large datasets with various graph
                 properties, one of which contains 1.95 billion edges.
                 The experimental results show that our proposed
                 algorithms are scalable and efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Binnig:2016:ESN,
  author =       "Carsten Binnig and Andrew Crotty and Alex Galakatos
                 and Tim Kraska and Erfan Zamanian",
  title =        "The end of slow networks: it's time for a redesign",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "7",
  pages =        "528--539",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:10:00 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The next generation of high-performance networks with
                 remote direct memory access (RDMA) capabilities
                 requires a fundamental rethinking of the design of
                 distributed in-memory DBMSs. These systems are commonly
                 built under the assumption that the network is the
                 primary bottleneck and should be avoided at all costs,
                 but this assumption no longer holds. For instance, with
                 InfiniBand FDR $ 4 \times $, the bandwidth available to
                 transfer data across the network is in the same
                 ballpark as the bandwidth of one memory channel.
                 Moreover, RDMA transfer latencies continue to rapidly
                 improve as well. In this paper, we first argue that
                 traditional distributed DBMS architectures cannot take
                 full advantage of high-performance networks and suggest
                 a new architecture to address this problem. Then, we
                 discuss initial results from a prototype implementation
                 of our proposed architecture for OLTP and OLAP, showing
                 remarkable performance improvements over existing
                 designs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2016:LLE,
  author =       "Jiewen Huang and Daniel J. Abadi",
  title =        "{Leopard}: lightweight edge-oriented partitioning and
                 replication for dynamic graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "7",
  pages =        "540--551",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:10:00 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper introduces a dynamic graph partitioning
                 algorithm, designed for large, constantly changing
                 graphs. We propose a partitioning framework that
                 adjusts on the fly as the graph structure changes. We
                 also introduce a replication algorithm that is tightly
                 integrated with the partitioning algorithm, which
                 further reduces the number of edges cut by the
                 partitioning algorithm. Even though the proposed
                 approach is handicapped by only taking into
                 consideration local parts of the graph when reassigning
                 vertices, extensive evaluation shows that the proposed
                 approach maintains a quality partitioning over time,
                 which is comparable at any point in time to performing
                 a full partitioning from scratch using a state-the-art
                 static graph partitioning algorithm such as METIS.
                 Furthermore, when vertex replication is turned on,
                 edge-cut can improve by an order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gribkoff:2016:SDP,
  author =       "Eric Gribkoff and Dan Suciu",
  title =        "{SlimShot}: in-database probabilistic inference for
                 knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "7",
  pages =        "552--563",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:10:00 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Increasingly large Knowledge Bases are being created,
                 by crawling the Web or other corpora of documents, and
                 by extracting facts and relations using machine
                 learning techniques. To manage the uncertainty in the
                 data, these KBs rely on probabilistic engines based on
                 Markov Logic Networks (MLN), for which probabilistic
                 inference remains a major challenge. Today's state of
                 the art systems use variants of MCMC, which have no
                 theoretical error guarantees, and, as we show, suffer
                 from poor performance in practice. In this paper we
                 describe SlimShot (Scalable Lifted Inference and Monte
                 Carlo Sampling Hybrid Optimization Technique), a
                 probabilistic inference engine for knowledge bases.
                 SlimShot converts the MLN to a tuple-independent
                 probabilistic database, then uses a simple Monte
                 Carlo-based inference, with three key enhancements: (1)
                 it combines sampling with safe query evaluation, (2) it
                 estimates a conditional probability by jointly
                 computing the numerator and denominator, and (3) it
                 adjusts the proposal distribution based on the sample
                 cardinality. In combination, these three techniques
                 allow us to give formal error guarantees, and we
                 demonstrate empirically that SlimShot outperforms
                 to-day's state of the art probabilistic inference
                 engines used in knowledge bases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yan:2016:GPQ,
  author =       "Da Yan and James Cheng and M. Tamer {\"O}zsu and Fan
                 Yang and Yi Lu and John C. S. Lui and Qizhen Zhang and
                 Wilfred Ng",
  title =        "A general-purpose query-centric framework for querying
                 big graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "7",
  pages =        "564--575",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:10:00 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Pioneered by Google's Pregel, many distributed systems
                 have been developed for large-scale graph analytics.
                 These systems employ a user-friendly ``think like a
                 vertex'' programming model, and exhibit good
                 scalability for tasks where the majority of graph
                 vertices participate in computation. However, the
                 design of these systems can seriously under-utilize the
                 resources in a cluster for processing light-workload
                 graph queries, where only a small fraction of vertices
                 need to be accessed. In this work, we develop a new
                 open-source system, called Quegel, for querying big
                 graphs. Quegel treats queries as first-class citizens
                 in its design: users only need to specify the
                 Pregel-like algorithm for a generic query, and Quegel
                 processes light-workload graph queries on demand, using
                 a novel superstep-sharing execution model to
                 effectively utilize the cluster resources. Quegel
                 further provides a convenient interface for
                 constructing graph indexes, which significantly improve
                 query performance but are not supported by existing
                 graph-parallel systems. Our experiments verified that
                 Quegel is highly efficient in answering various types
                 of graph queries and is up to orders of magnitude
                 faster than existing systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Brucato:2016:SPQ,
  author =       "Matteo Brucato and Juan Felipe Beltran and Azza
                 Abouzied and Alexandra Meliou",
  title =        "Scalable package queries in relational database
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "7",
  pages =        "576--587",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:10:00 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional database queries follow a simple model:
                 they define constraints that each tuple in the result
                 must satisfy. This model is computationally efficient,
                 as the database system can evaluate the query
                 conditions on each tuple individually. However, many
                 practical, real-world problems require a collection of
                 result tuples to satisfy constraints collectively,
                 rather than individually. In this paper, we present
                 package queries, a new query model that extends
                 traditional database queries to handle complex
                 constraints and preferences over answer sets. We
                 develop a full-fledged package query system,
                 implemented on top of a traditional database engine.
                 Our work makes several contributions. First, we design
                 PaQL, a SQL-based query language that supports the
                 declarative specification of package queries. We prove
                 that PaQL is at least as expressive as integer linear
                 programming, and therefore, evaluation of package
                 queries is in general NP-hard. Second, we present a
                 fundamental evaluation strategy that combines the
                 capabilities of databases and constraint optimization
                 solvers to derive solutions to package queries. The
                 core of our approach is a set of translation rules that
                 transform a package query to an integer linear program.
                 Third, we introduce an offline data partitioning
                 strategy allowing query evaluation to scale to large
                 data sizes. Fourth, we introduce SketchRefine, a
                 scalable algorithm for package evaluation, with strong
                 approximation guarantees ($ (1 \pm \epsilon)^6$-factor
                 approximation). Finally, we present extensive
                 experiments over real-world and benchmark data. The
                 results demonstrate that SketchRefine is effective at
                 deriving high-quality package results, and achieves
                 runtime performance that is an order of magnitude
                 faster than directly using ILP solvers over large
                 datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2016:STK,
  author =       "Xiang Wang and Ying Zhang and Wenjie Zhang and Xuemin
                 Lin and Zengfeng Huang",
  title =        "{Skype}: top-$k$ spatial-keyword publish\slash
                 subscribe over sliding window",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "7",
  pages =        "588--599",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:10:00 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As the prevalence of social media and GPS-enabled
                 devices, a massive amount of geo-textual data has been
                 generated in a stream fashion, leading to a variety of
                 applications such as location-based recommendation and
                 information dissemination. In this paper, we
                 investigate a novel real-time top-$k$ monitoring
                 problem over sliding window of streaming data; that is,
                 we continuously maintain the top-$k$ most relevant
                 geo-textual messages (e.g., geo-tagged tweets) for a
                 large number of spatial-keyword subscriptions (e.g.,
                 registered users interested in local events)
                 simultaneously. To provide the most recent information
                 under controllable memory cost, sliding window model is
                 employed on the streaming geo-textual data. To the best
                 of our knowledge, this is the first work to study
                 top-$k$ spatial-keyword publish/subscribe over sliding
                 window. A novel system, called Skype (Top-k
                 Spatial-keyword Publish/Subscribe), is proposed in this
                 paper. In Skype, to continuously maintain top-$k$
                 results for massive subscriptions, we devise a novel
                 indexing structure upon subscriptions such that each
                 incoming message can be immediately delivered on its
                 arrival. Moreover, to reduce the expensive top-$k$
                 re-evaluation cost triggered by message expiration, we
                 develop a novel cost-based $k$-skyband technique to
                 reduce the number of re-evaluations in a cost-effective
                 way. Extensive experiments verify the great efficiency
                 and effectiveness of our proposed techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Asudeh:2016:DSW,
  author =       "Abolfazl Asudeh and Saravanan Thirumuruganathan and
                 Nan Zhang and Gautam Das",
  title =        "Discovering the skyline of web databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "7",
  pages =        "600--611",
  month =        mar,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 19 10:10:00 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many web databases are ``hidden'' behind proprietary
                 search interfaces that enforce the top-$k$ output
                 constraint, i.e., each query returns at most $k$ of all
                 matching tuples, preferentially selected and returned
                 according to a proprietary ranking function. In this
                 paper, we initiate research into the novel problem of
                 skyline discovery over top-$k$ hidden web databases.
                 Since skyline tuples provide critical insights into the
                 database and include the top-ranked tuple for every
                 possible ranking function following the monotonic order
                 of attribute values, skyline discovery from a hidden
                 web database can enable a wide variety of innovative
                 third-party applications over one or multiple web
                 databases. Our research in the paper shows that the
                 critical factor affecting the cost of skyline discovery
                 is the type of search interface controls provided by
                 the website. As such, we develop efficient algorithms
                 for three most popular types, i.e., one-ended range,
                 free range and point predicates, and then combine them
                 to support web databases that feature a mixture of
                 these types. Rigorous theoretical analysis and
                 extensive real-world online and offline experiments
                 demonstrate the effectiveness of our proposed
                 techniques and their superiority over baseline
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2016:CTK,
  author =       "Xiaohang Zhang and Guoliang Li and Jianhua Feng",
  title =        "Crowdsourced top-$k$ algorithms: an experimental
                 evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "8",
  pages =        "612--623",
  month =        apr,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2921558.2921559",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu May 26 16:07:35 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Crowdsourced top-$k$ computation has attracted
                 significant attention recently, thanks to emerging
                 crowdsourcing platforms, e.g., Amazon Mechanical Turk
                 and CrowdFlower. Crowdsourced top-$k$ algorithms ask
                 the crowd to compare the objects and infer the top-$k$
                 objects based on the crowdsourced comparison results.
                 The crowd may return incorrect answers, but traditional
                 top-$k$ algorithms cannot tolerate the errors from the
                 crowd. To address this problem, the database and
                 machine-learning communities have independently studied
                 the crowdsourced top-$k$ problem. The database
                 community proposes the heuristic-based solutions while
                 the machine-learning community proposes the
                 learning-based methods (e.g., maximum likelihood
                 estimation). However, these two types of techniques
                 have not been compared systematically under the same
                 experimental framework. Thus it is rather difficult for
                 a practitioner to decide which algorithm should be
                 adopted. Furthermore, the experimental evaluation of
                 existing studies has several weaknesses. Some methods
                 assume the crowd returns high-quality results and some
                 algorithms are only tested on simulated experiments. To
                 alleviate these limitations, in this paper we present a
                 comprehensive comparison of crowdsourced top-$k$
                 algorithms. Using various synthetic and real datasets,
                 we evaluate each algorithm in terms of result quality
                 and efficiency on real crowdsourcing platforms. We
                 reveal the characteristics of different techniques and
                 provide guidelines on selecting appropriate algorithms
                 for various scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Maddox:2016:DRD,
  author =       "Michael Maddox and David Goehring and Aaron J. Elmore
                 and Samuel Madden and Aditya Parameswaran and Amol
                 Deshpande",
  title =        "{Decibel}: the relational dataset branching system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "9",
  pages =        "624--635",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu May 26 16:06:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As scientific endeavors and data analysis become
                 increasingly collaborative, there is a need for data
                 management systems that natively support the versioning
                 or branching of datasets to enable concurrent analysis,
                 cleaning, integration, manipulation, or curation of
                 data across teams of individuals. Common practice for
                 sharing and collaborating on datasets involves creating
                 or storing multiple copies of the dataset, one for each
                 stage of analysis, with no provenance information
                 tracking the relationships between these datasets. This
                 results not only in wasted storage, but also makes it
                 challenging to track and integrate modifications made
                 by different users to the same dataset. In this paper,
                 we introduce the Relational Dataset Branching System,
                 Decibel, a new relational storage system with built-in
                 version control designed to address these
                 short-comings. We present our initial design for
                 Decibel and provide a thorough evaluation of three
                 versioned storage engine designs that focus on
                 efficient query processing with minimal storage
                 overhead. We also develop an exhaustive benchmark to
                 enable the rigorous testing of these and future
                 versioned storage engine designs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mann:2016:EES,
  author =       "Willi Mann and Nikolaus Augsten and Panagiotis
                 Bouros",
  title =        "An empirical evaluation of set similarity join
                 techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "9",
  pages =        "636--647",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu May 26 16:06:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Set similarity joins compute all pairs of similar sets
                 from two collections of sets. We conduct extensive
                 experiments on seven state-of-the-art algorithms for
                 set similarity joins. These algorithms adopt a
                 filter-verification approach. Our analysis shows that
                 verification has not received enough attention in
                 previous works. In practice, efficient verification
                 inspects only a small, constant number of set elements
                 and is faster than some of the more sophisticated
                 filter techniques. Although we can identify three
                 winners, we find that most algorithms show very similar
                 performance. The key technique is the prefix filter,
                 and AllPairs, the first algorithm adopting this
                 techniques is still a relevant competitor. We repeat
                 experiments from previous work and discuss diverging
                 results. All our claims are supported by a detailed
                 analysis of the factors that determine the overall
                 runtime.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Trummer:2016:MQO,
  author =       "Immanuel Trummer and Christoph Koch",
  title =        "Multiple query optimization on the {D-Wave 2X}
                 adiabatic quantum computer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "9",
  pages =        "648--659",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu May 26 16:06:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The D-Wave adiabatic quantum annealer solves hard
                 combinatorial optimization problems leveraging quantum
                 physics. The newest version features over 1000 qubits
                 and was released in August 2015. We were given access
                 to such a machine, currently hosted at NASA Ames
                 Research Center in California, to explore the potential
                 for hard optimization problems that arise in the
                 context of databases. In this paper, we tackle the
                 problem of multiple query optimization (MQO). We show
                 how an MQO problem instance can be transformed into a
                 mathematical formula that complies with the restrictive
                 input format accepted by the quantum annealer. This
                 formula is translated into weights on and between
                 qubits such that the configuration minimizing the input
                 formula can be found via a process called adiabatic
                 quantum annealing. We analyze the asymptotic growth
                 rate of the number of required qubits in the MQO
                 problem dimensions as the number of qubits is currently
                 the main factor restricting applicability. We
                 experimentally compare the performance of the quantum
                 annealer against other MQO algorithms executed on a
                 traditional computer. While the problem sizes that can
                 be treated are currently limited, we already find a
                 class of problem instances where the quantum annealer
                 is three orders of magnitude faster than other
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Trummer:2016:PQO,
  author =       "Immanuel Trummer and Christoph Koch",
  title =        "Parallelizing query optimization on shared-nothing
                 architectures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "9",
  pages =        "660--671",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu May 26 16:06:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data processing systems offer an ever increasing
                 degree of parallelism on the levels of cores, CPUs, and
                 processing nodes. Query optimization must exploit high
                 degrees of parallelism in order not to gradually become
                 the bottleneck of query evaluation. We show how to
                 parallelize query optimization at a massive scale. We
                 present algorithms for parallel query optimization in
                 left-deep and bushy plan spaces. At optimization start,
                 we divide the plan space for a given query into
                 partitions of equal size that are explored in parallel
                 by worker nodes. At the end of optimization, each
                 worker returns the optimal plan in its partition to the
                 master which determines the globally optimal plan from
                 the partition-optimal plans. No synchronization or data
                 exchange is required during the actual optimization
                 phase. The amount of data sent over the network, at the
                 start and at the end of optimization, as well as the
                 complexity of serial steps within our algorithms
                 increase only linearly in the number of workers and in
                 the query size. The time and space complexity of
                 optimization within one partition decreases uniformly
                 in the number of workers. We parallelize single- and
                 multi-objective query optimization over a cluster with
                 100 nodes in our experiments, using more than 250
                 concurrent worker threads (Spark executors). Despite
                 high network latency and task assignment overheads,
                 parallelization yields speedups of up to one order of
                 magnitude for large queries whose optimization takes
                 minutes on a single node.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kalavri:2016:SPA,
  author =       "Vasiliki Kalavri and Tiago Simas and Dionysios
                 Logothetis",
  title =        "The shortest path is not always a straight line:
                 leveraging semi-metricity in graph analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "9",
  pages =        "672--683",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu May 26 16:06:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we leverage the concept of the metric
                 backbone to improve the efficiency of large-scale graph
                 analytics. The metric backbone is the minimum subgraph
                 that preserves the shortest paths of a weighted graph.
                 We use the metric backbone in place of the original
                 graph to compute various graph metrics exactly or with
                 good approximation. By computing on a smaller graph, we
                 improve the performance of graph analytics applications
                 on two different systems, a batch graph processing
                 system and a graph database. Further, we provide an
                 algorithm for the computation of the metric backbone on
                 large graphs. While one can compute the metric backbone
                 by solving the all-pairs-shortest-paths problem, this
                 approach incurs prohibitive time and space complexity
                 for big graphs. Instead, we propose a heuristic that
                 makes computing the metric backbone practical even for
                 large graphs. Additionally, we analyze several real
                 datasets of different sizes and domains and we show
                 that we can approximate the metric backbone by removing
                 only first-order semi-metric edges; edges for which a
                 shorter two-hop path exists. We provide a distributed
                 implementation of our algorithm and apply it in large
                 scale scenarios. We evaluate our algorithm using a
                 variety of real graphs, including a Facebook social
                 network subgraph of $ \approx $50 billion edges. We
                 measure the impact of using the metric backbone on
                 runtime performance in two graph management systems. We
                 achieve query speedups of up to 6.7x in the Neo4j
                 commercial graph database and job speedups of up to 6x
                 in the Giraph graph processing system.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papadakis:2016:CAA,
  author =       "George Papadakis and Jonathan Svirsky and Avigdor Gal
                 and Themis Palpanas",
  title =        "Comparative analysis of approximate blocking
                 techniques for entity resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "9",
  pages =        "684--695",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu May 26 16:06:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity Resolution is a core task for merging data
                 collections. Due to its quadratic complexity, it
                 typically scales to large volumes of data through
                 blocking: similar entities are clustered into blocks
                 and pair-wise comparisons are executed only between
                 co-occurring entities, at the cost of some missed
                 matches. There are numerous blocking methods, and the
                 aim of this work is to offer a comprehensive empirical
                 survey, extending the dimensions of comparison beyond
                 what is commonly available in the literature. We
                 consider 17 state-of-the-art blocking methods and use 6
                 popular real datasets to examine the robustness of
                 their internal configurations and their relative
                 balance between effectiveness and time efficiency. We
                 also investigate their scalability over a corpus of 7
                 established synthetic datasets that range from 10,000
                 to 2 million entities.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhao:2016:EED,
  author =       "Yiran Zhao and Shen Li and Shaohan Hu and Hongwei Wang
                 and Shuochao Yao and Huajie Shao and Tarek Abdelzaher",
  title =        "An experimental evaluation of datacenter workloads on
                 low-power embedded micro servers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "9",
  pages =        "696--707",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu May 26 16:06:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents a comprehensive evaluation of an
                 ultra-low power cluster, built upon the Intel Edison
                 based micro servers. The improved performance and high
                 energy efficiency of micro servers have driven both
                 academia and industry to explore the possibility of
                 replacing conventional brawny servers with a larger
                 swarm of embedded micro servers. Existing attempts
                 mostly focus on mobile-class micro servers, whose
                 capacities are similar to mobile phones. We, on the
                 other hand, target on sensor-class micro servers, which
                 are originally intended for uses in wearable
                 technologies, sensor networks, and Internet-of-Things.
                 Although sensor-class micro servers have much less
                 capacity, they are touted for minimal power consumption
                 (< 1 Watt), which opens new possibilities of achieving
                 higher energy efficiency in datacenter workloads. Our
                 systematic evaluation of the Edison cluster and
                 comparisons to conventional brawny clusters involve
                 careful workload choosing and laborious parameter
                 tuning, which ensures maximum server utilization and
                 thus fair comparisons. Results show that the Edison
                 cluster achieves up to 3.5x improvement on
                 work-done-per-joule for web service applications and
                 data-intensive MapReduce jobs. In terms of scalability,
                 the Edison cluster scales linearly on the throughput of
                 web service workloads, and also shows satisfactory
                 scalability for MapReduce workloads despite
                 coordination overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Song:2016:CTT,
  author =       "Shaoxu Song and Yue Cao and Jianmin Wang",
  title =        "Cleaning timestamps with temporal constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "708--719",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977798",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Timestamps are often found to be dirty in various
                 scenarios, e.g., in distributed systems with clock
                 synchronization problems or unreliable RFID readers.
                 Without cleaning the imprecise timestamps,
                 temporal-related applications such as provenance
                 analysis or pattern queries are not reliable. To
                 evaluate the correctness of timestamps, temporal
                 constraints could be employed, which declare the
                 distance restrictions between timestamps. Guided by
                 such constraints on timestamps, in this paper, we study
                 a novel problem of repairing inconsistent timestamps
                 that do not conform to the required temporal
                 constraints. Following the same line of data repairing,
                 the timestamp repairing problem is to minimally modify
                 the timestamps towards satisfaction of temporal
                 constraints. This problem is practically challenging,
                 given the huge space of possible timestamps. We tackle
                 the problem by identifying a concise set of promising
                 candidates, where an optimal repair solution can always
                 be found. Repair algorithms with efficient pruning are
                 then devised over the identified candidates.
                 Experiments on real datasets demonstrate the
                 superiority of our proposal compared to the
                 state-of-the-art approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tan:2016:TRS,
  author =       "Zilong Tan and Shivnath Babu",
  title =        "{Tempo}: robust and self-tuning resource management in
                 multi-tenant parallel databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "720--731",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977799",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multi-tenant database systems have a component called
                 the Resource Manager, or RM that is responsible for
                 allocating resources to tenants. RMs today do not
                 provide direct support for performance objectives such
                 as: ``Average job response time of tenant A must be
                 less than two minutes'', or ``No more than 5\% of
                 tenant B's jobs can miss the deadline of 1 hour.''
                 Thus, DBAs have to tinker with the RM's low-level
                 configuration settings to meet such objectives. We
                 propose a framework called Tempo that brings
                 simplicity, self-tuning, and robustness to existing
                 RMs. Tempo provides a simple interface for DBAs to
                 specify performance objectives declaratively, and
                 optimizes the RM configuration settings to meet these
                 objectives. Tempo has a solid theoretical foundation
                 which gives key robustness guarantees. We report
                 experiments done on Tempo using production traces of
                 data-processing workloads from companies such as
                 Facebook and Cloudera. These experiments demonstrate
                 significant improvements in meeting desired performance
                 objectives over RM configuration settings specified by
                 human experts.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Daenen:2016:PEM,
  author =       "Jonny Daenen and Frank Neven and Tony Tan and Stijn
                 Vansummeren",
  title =        "Parallel evaluation of multi-semi-joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "732--743",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977800",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While services such as Amazon AWS make computing power
                 abundantly available, adding more computing nodes can
                 incur high costs in, for instance, pay-as-you-go plans
                 while not always significantly improving the net
                 running time (aka wall-clock time) of queries. In this
                 work, we provide algorithms for parallel evaluation of
                 SGF queries in MapReduce that optimize total time,
                 while retaining low net time. Not only can SGF queries
                 specify all semi-join reducers, but also more
                 expressive queries involving disjunction and negation.
                 Since SGF queries can be seen as Boolean combinations
                 of (potentially nested) semi-joins, we introduce a
                 novel multi-semi-join (MSJ) MapReduce operator that
                 enables the evaluation of a set of semi-joins in one
                 job. We use this operator to obtain parallel query
                 plans for SGF queries that outvalue sequential plans
                 w.r.t. net time and provide additional optimizations
                 aimed at minimizing total time without severely
                 affecting net time. Even though the latter
                 optimizations are NP-hard, we present effective greedy
                 algorithms. Our experiments, conducted using our own
                 implementation Gumbo on top of Hadoop, confirm the
                 usefulness of parallel query plans, and the
                 effectiveness and scalability of our optimizations, all
                 with a significant improvement over Pig and Hive.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2016:WCE,
  author =       "Jianfei Chen and Kaiwei Li and Jun Zhu and Wenguang
                 Chen",
  title =        "{WarpLDA}: a cache efficient {O(1)} algorithm for
                 latent {Dirichlet} allocation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "744--755",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977801",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Developing efficient and scalable algorithms for
                 Latent Dirichlet Allocation (LDA) is of wide interest
                 for many applications. Previous work has developed an $
                 O(1) $ Metropolis--Hastings (MH) sampling method for
                 each token. However, its performance is far from being
                 optimal due to frequent cache misses caused by random
                 accesses to the parameter matrices. In this paper, we
                 first carefully analyze the memory access behavior of
                 existing algorithms for LDA by cache locality at
                 document level. We then develop WarpLDA, which achieves
                 $ O(1) $ time complexity per-token and fits the
                 randomly accessed memory per document in the L3 cache.
                 Our empirical results in a wide range of testing
                 conditions demonstrate that WarpLDA is consistently
                 5-15x faster than the state-of-the-art MH-based
                 LightLDA, and is faster than the state-of-the-art
                 sparsity aware F+LDA in most settings. Our WarpLDA
                 learns a million topics from 639 millions of documents
                 in only five hours at an unprecedented throughput of 11
                 billion tokens per second.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eich:2016:FPG,
  author =       "Marius Eich and Pit Fender and Guido Moerkotte",
  title =        "Faster plan generation through consideration of
                 functional dependencies and keys",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "756--767",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977802",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "It has been a recognized fact for many years that
                 query execution can benefit from pushing group-by
                 operators down in the operator tree and applying them
                 before a join. This so-called eager aggregation reduces
                 the size(s) of the join argument(s), making join
                 evaluation faster. Lately, the idea enjoyed a revival
                 when it was applied to outer joins for the first time
                 and incorporated in a state-of-the-art plan generator.
                 However, this recent approach is highly dependent on
                 the use of heuristics because of the exponential growth
                 of the search space that goes along with eager
                 aggregation. Finding an optimal solution for larger
                 queries calls for effective optimality preserving
                 pruning mechanisms to reduce the search space size as
                 far as possible. By a more thorough investigation of
                 functional dependencies and keys, we provide a set of
                 new pruning criteria and evaluate their effectiveness
                 with respect to the runtime and memory consumption of
                 the resulting plan generator.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schuhknecht:2016:RIR,
  author =       "Felix Martin Schuhknecht and Jens Dittrich and Ankur
                 Sharma",
  title =        "{RUMA} has it: rewired user-space memory access is
                 possible!",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "768--779",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977803",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Memory management is one of the most boring topics in
                 database research. It plays a minor role in tasks like
                 free-space management or efficient space usage. Here
                 and there we also realize its impact on database
                 performance when worrying about NUMA-aware memory
                 allocation, data compacting, snapshotting, and
                 defragmentation. But, overall, let's face it: the
                 entire topic sounds as exciting as 'garbage collection'
                 or 'debugging a program for memory leaks'. What if
                 there were a technique that would promote memory
                 management from a third class helper thingie to a first
                 class citizen in algorithm and systems design? What if
                 that technique turned the role of memory management in
                 a database system (and any other data processing
                 system) upside-down? What if that technique could be
                 identified as a key for re-designing various core
                 algorithms with the effect of outperforming existing
                 state-of-the-art methods considerably? Then we would
                 write this paper. We introduce RUMA: Rewired User-space
                 Memory Access. It allows for physiological data
                 management, i.e. we allow developers to freely rewire
                 the mappings from virtual to physical memory (in user
                 space) while at the same time exploiting the virtual
                 memory support offered by hardware and operating
                 system. We show that fundamental database building
                 blocks such as array operations, partitioning, sorting,
                 and snapshotting benefit strongly from RUMA.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Marcus:2016:WLB,
  author =       "Ryan Marcus and Olga Papaemmanouil",
  title =        "{WiSeDB}: a learning-based workload management advisor
                 for cloud databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "780--791",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977804",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Workload management for cloud databases deals with the
                 tasks of resource provisioning, query placement, and
                 query scheduling in a manner that meets the
                 application's performance goals while minimizing the
                 cost of using cloud resources. Existing solutions have
                 approached these three challenges in isolation while
                 aiming to optimize a single performance metric. In this
                 paper, we introduce WiSeDB, a learning-based framework
                 for generating holistic workload management solutions
                 customized to application-defined performance goals and
                 workload characteristics. Our approach relies on
                 supervised learning to train cost-effective decision
                 tree models for guiding query placement, scheduling,
                 and resource provisioning decisions. Applications can
                 use these models for both batch and online scheduling
                 of incoming workloads. A unique feature of our system
                 is that it can adapt its offline model to
                 stricter/looser performance goals with minimal
                 re-training. This allows us to present to the
                 application alternative workload management strategies
                 that address the typical performance vs. cost trade-off
                 of cloud services. Experimental results show that our
                 approach has very low training overhead while offering
                 low cost strategies for a variety of performance
                 metrics and workload characteristics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{DeFrancisciMorales:2016:SSS,
  author =       "Gianmarco {De Francisci Morales} and Aristides
                 Gionis",
  title =        "Streaming similarity self-join",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "792--803",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977805",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We introduce and study the problem of computing the
                 similarity self-join in a streaming context (SSSJ),
                 where the input is an unbounded stream of items
                 arriving continuously. The goal is to find all pairs of
                 items in the stream whose similarity is greater than a
                 given threshold. The simplest formulation of the
                 problem requires unbounded memory, and thus, it is
                 intractable. To make the problem feasible, we introduce
                 the notion of time-dependent similarity: the similarity
                 of two items decreases with the difference in their
                 arrival time. By leveraging the properties of this
                 time-dependent similarity function, we design two
                 algorithmic frameworks to solve the SSSJ problem. The
                 first one, MiniBatch (MB), uses existing index-based
                 filtering techniques for the static version of the
                 problem, and combines them in a pipeline. The second
                 framework, Streaming (STR), adds time filtering to the
                 existing indexes, and integrates new time-based bounds
                 deeply in the working of the algorithms. We also
                 introduce a new indexing technique (L2), which is based
                 on an existing state-of-the-art indexing technique
                 (L2AP), but is optimized for the streaming case.
                 Extensive experiments show that the STR algorithm, when
                 instantiated with the L2 index, is the most scalable
                 option across a wide array of datasets and
                 parameters.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schatzle:2016:SRQ,
  author =       "Alexander Sch{\"a}tzle and Martin Przyjaciel-Zablocki
                 and Simon Skilevic and Georg Lausen",
  title =        "{S2RDF}: {RDF} querying with {SPARQL} on spark",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "804--815",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977806",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "RDF has become very popular for semantic data
                 publishing due to its flexible and universal graph-like
                 data model. Thus, the ever-increasing size of RDF data
                 collections raises the need for scalable distributed
                 approaches. We endorse the usage of existing
                 infrastructures for Big Data processing like Hadoop for
                 this purpose. Yet, SPARQL query performance is a major
                 challenge as Hadoop is not intentionally designed for
                 RDF processing. Existing approaches often favor certain
                 query pattern shapes while performance drops
                 significantly for other shapes. In this paper, we
                 introduce a novel relational partitioning schema for
                 RDF data called ExtVP that uses a semi-join based
                 preprocessing, akin to the concept of Join Indices in
                 relational databases, to efficiently minimize query
                 input size regardless of its pattern shape and
                 diameter. Our prototype system S2RDF is built on top of
                 Spark and uses SQL to execute SPARQL queries over
                 ExtVP. We demonstrate its superior performance in
                 comparison to state of the art SPARQL-on-Hadoop
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Singh:2016:BSS,
  author =       "Rishabh Singh",
  title =        "{BlinkFill}: semi-supervised programming by example
                 for syntactic string transformations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "816--827",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977807",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The recent Programming By Example (PBE) techniques
                 such as FlashFill have shown great promise for enabling
                 end-users to perform data transformation tasks using
                 input-output examples. Since examples are inherently an
                 under-specification, there are typically a large number
                 of hypotheses conforming to the examples, and the PBE
                 techniques suffer from scalability issues for finding
                 the intended program amongst the large space. We
                 present a semi-supervised learning technique to
                 significantly reduce this ambiguity by using the
                 logical information present in the input data to guide
                 the synthesis algorithm. We develop a data structure
                 InputDataGraph to succinctly represent a large set of
                 logical patterns that are shared across the input data,
                 and use this graph to efficiently learn substring
                 expressions in a new PBE system B linkFill. We evaluate
                 BlinkFill on 207 real-world benchmarks and show that
                 BlinkFill is significantly faster (on average 41x) and
                 requires fewer input-output examples (1.27 vs 1.53) to
                 learn the desired transformations in comparison to
                 FlashFill.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deng:2016:MEM,
  author =       "Dong Deng and Guoliang Li and He Wen and H. V.
                 Jagadish and Jianhua Feng",
  title =        "{META}: an efficient matching-based method for
                 error-tolerant autocompletion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "10",
  pages =        "828--839",
  month =        jun,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2977797.2977808",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Autocompletion has been widely adopted in many
                 computing systems because it can instantly provide
                 users with results as users type in queries. Since the
                 typing task is tedious and prone to error, especially
                 on mobile devices, a recent trend is to tolerate errors
                 in autocompletion. Existing error-tolerant
                 autocompletion methods build a trie to index the data,
                 utilize the trie index to compute the trie nodes that
                 are similar to the query, called active nodes, and
                 identify the leaf descendants of active nodes as the
                 results. However these methods have two limitations.
                 First, they involve many redundant computations to
                 identify the active nodes. Second, they do not support
                 top- k queries. To address these problems, we propose a
                 matching-based framework, which computes the answers
                 based on matching characters between queries and data.
                 We design a compact tree index to maintain active nodes
                 in order to avoid the redundant computations. We devise
                 an incremental method to efficiently answer top- k
                 queries. Experimental results on real datasets show
                 that our method outperforms state-of-the-art approaches
                 by 1--2 orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zheng:2016:SSS,
  author =       "Weiguo Zheng and Lei Zou and Wei Peng and Xifeng Yan
                 and Shaoxu Song and Dongyan Zhao",
  title =        "Semantic {SPARQL} similarity search over {RDF}
                 knowledge graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "11",
  pages =        "840--851",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2983200.2983201",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "RDF knowledge graphs have attracted increasing
                 attentions these years. However, due to the schema-free
                 nature of RDF data, it is very difficult for users to
                 have full knowledge of the underlying schema.
                 Furthermore, the same kind of information can be
                 represented in diverse graph fragments. Hence, it is a
                 huge challenge to formulate complex SPARQL expressions
                 by taking the union of all possible structures. In this
                 paper, we propose an effective framework to access the
                 RDF repository even if users have no full knowledge of
                 the underlying schema. Specifically, given a SPARQL
                 query, the system could return as more answers that
                 match the query based on the semantic similarity as
                 possible. Interestingly, we propose a systematic method
                 to mine diverse semantically equivalent structure
                 patterns. More importantly, incorporating both
                 structural and semantic similarities we are the first
                 to propose a novel similarity measure, semantic graph
                 edit distance. In order to improve the efficiency
                 performance, we apply the semantic summary graph to
                 summarize the knowledge graph, which supports both
                 high-level pruning and drill-down pruning. We also
                 devise an effective lower bound based on the TA-style
                 access to each of the candidate sets. Extensive
                 experiments over real datasets confirm the
                 effectiveness and efficiency of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dubey:2016:WHP,
  author =       "Ayush Dubey and Greg D. Hill and Robert Escriva and
                 Emin G{\"u}n Sirer",
  title =        "{Weaver}: a high-performance, transactional graph
                 database based on refinable timestamps",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "11",
  pages =        "852--863",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2983200.2983202",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph databases have become a common infrastructure
                 component. Yet existing systems either operate on
                 offline snapshots, provide weak consistency guarantees,
                 or use expensive concurrency control techniques that
                 limit performance. In this paper, we introduce a new
                 distributed graph database, called Weaver, which
                 enables efficient, transactional graph analyses as well
                 as strictly serializable ACID transactions on dynamic
                 graphs. The key insight that allows Weaver to combine
                 strict serializability with horizontal scalability and
                 high performance is a novel request ordering mechanism
                 called refinable timestamps. This technique couples
                 coarse-grained vector timestamps with a fine-grained
                 timeline oracle to pay the overhead of strong
                 consistency only when needed. Experiments show that
                 Weaver enables a Bitcoin blockchain explorer that is 8x
                 faster than Blockchain.info, and achieves 10.9x higher
                 throughput than the Titan graph database on social
                 network workloads and 4x lower latency than GraphLab on
                 offline graph traversal workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chu:2016:DDD,
  author =       "Xu Chu and Ihab F. Ilyas and Paraschos Koutris",
  title =        "Distributed data deduplication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "11",
  pages =        "864--875",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2983200.2983203",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data deduplication refers to the process of
                 identifying tuples in a relation that refer to the same
                 real world entity. The complexity of the problem is
                 inherently quadratic with respect to the number of
                 tuples, since a similarity value must be computed for
                 every pair of tuples. To avoid comparing tuple pairs
                 that are obviously non-duplicates, blocking techniques
                 are used to divide the tuples into blocks and only
                 tuples within the same block are compared. However,
                 even with the use of blocking, data deduplication
                 remains a costly problem for large datasets. In this
                 paper, we show how to further speed up data
                 deduplication by leveraging parallelism in a
                 shared-nothing computing environment. Our main
                 contribution is a distribution strategy, called
                 Dis-Dedup, that minimizes the maximum workload across
                 all worker nodes and provides strong theoretical
                 guarantees. We demonstrate the effectiveness of our
                 proposed strategy by performing extensive experiments
                 on both synthetic datasets with varying block size
                 distributions, as well as real world datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arenas:2016:FAC,
  author =       "Marcelo Arenas and Francisco Maturana and Cristian
                 Riveros and Domagoj Vrgoc",
  title =        "A framework for annotating {CSV}-like data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "11",
  pages =        "876--887",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2983200.2983204",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we propose a simple and expressive
                 framework for adding metadata to CSV documents and
                 their noisy variants. The framework is based on
                 annotating parts of the document that can be later used
                 to read, query, or exchange the data. The core of our
                 framework is a language based on extended regular
                 expressions that are used for selecting data. These
                 expressions are then combined using a set of rules in
                 order to annotate the data. We study the computational
                 complexity of implementing our framework and present an
                 efficient evaluation algorithm that runs in time
                 proportional to its output and linear in its input. As
                 a proof of concept, we test an implementation of our
                 framework against a large number of real world datasets
                 and show that it can be efficiently used in practice.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Asudeh:2016:QRS,
  author =       "Abolfazl Asudeh and Nan Zhang and Gautam Das",
  title =        "Query reranking as a service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "11",
  pages =        "888--899",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2983200.2983205",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ranked retrieval model has rapidly become the de
                 facto way for search query processing in client-server
                 databases, especially those on the web. Despite of the
                 extensive efforts in the database community on
                 designing better ranking functions/mechanisms, many
                 such databases in practice still fail to address the
                 diverse and sometimes contradicting preferences of
                 users on tuple ranking, perhaps (at least partially)
                 due to the lack of expertise and/or motivation for the
                 database owner to design truly effective ranking
                 functions. This paper takes a different route on
                 addressing the issue by defining a novel query
                 reranking problem, i.e., we aim to design a third-party
                 service that uses nothing but the public search
                 interface of a client-server database to enable the
                 on-the-fly processing of queries with any
                 user-specified ranking functions (with or without
                 selection conditions), no matter if the ranking
                 function is supported by the database or not. We
                 analyze the worst-case complexity of the problem and
                 introduce a number of ideas, e.g., on-the-fly indexing,
                 domination detection and virtual tuple pruning, to
                 reduce the average-case cost of the query reranking
                 algorithm. We also present extensive experimental
                 results on real-world datasets, in both offline and
                 live online systems, that demonstrate the effectiveness
                 of our proposed techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ma:2016:GSF,
  author =       "Hongbin Ma and Bin Shao and Yanghua Xiao and Liang
                 Jeff Chen and Haixun Wang",
  title =        "{G-SQL}: fast query processing via graph exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "900--911",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994510",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A lot of real-life data are of graph nature. However,
                 it is not until recently that business begins to
                 exploit data's connectedness for business insights. On
                 the other hand, RDBMSs are a mature technology for data
                 management, but they are not for graph processing. Take
                 graph traversal, a common graph operation for example,
                 it heavily relies on a graph primitive that accesses a
                 given node's neighborhood. We need to join tables
                 following foreign keys to access the nodes in the
                 neighborhood if an RDBMS is used to manage graph data.
                 Graph exploration is a fundamental building block of
                 many graph algorithms. But this simple operation is
                 costly due to a large volume of I/O caused by the
                 massive amount of table joins. In this paper, we
                 present G-SQL, our effort toward the integration of a
                 RDBMS and a native in-memory graph processing engine.
                 G-SQL leverages the fast graph exploration capability
                 provided by the graph engine to answer multi-way join
                 queries. Meanwhile, it uses RDBMSs to provide mature
                 data management functionalities, such as reliable data
                 storage and additional data access methods.
                 Specifically, G-SQL is a SQL dialect augmented with
                 graph exploration functionalities and it dispatches
                 query tasks to the in-memory graph engine and its
                 underlying RDMBS. The G-SQL runtime coordinates the two
                 query processors via a unified cost model to ensure the
                 entire query is processed efficiently. Experimental
                 results show that our approach greatly expands
                 capabilities of RDBMs and delivers exceptional
                 performance for SQL-graph hybrid queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2016:MOD,
  author =       "Mingxing Zhang and Yongwei Wu and Kang Chen and Teng
                 Ma and Weimin Zheng",
  title =        "Measuring and optimizing distributed array programs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "912--923",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994511",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nowadays, there is a rising trend of building
                 array-based distributed computing frameworks, which are
                 suitable for implementing many machine learning and
                 data mining algorithms. However, most of these
                 frameworks only execute each primitive in an isolated
                 manner and in the exact order defined by programmers,
                 which implies a huge space for optimization. In this
                 paper, we propose a novel array-based programming
                 model, named K asen, which distinguishes itself from
                 models in the existing literature by defining a strict
                 computation and communication model. This model makes
                 it easy to analyze programs' behavior and measure their
                 performance, with which we design a corresponding
                 optimizer that can automatically apply high-level
                 optimizations to the original programs written by
                 programmers. According to our evaluation, the optimizer
                 of Kasen can achieve a significant reduction on memory
                 read/write, buffer allocation and network traffic,
                 which leads to a speedup up to 5.82x.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jo:2016:YHP,
  author =       "Insoon Jo and Duck-Ho Bae and Andre S. Yoon and
                 Jeong-Uk Kang and Sangyeun Cho and Daniel D. G. Lee and
                 Jaeheon Jeong",
  title =        "{YourSQL}: a high-performance database system
                 leveraging in-storage computing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "924--935",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994512",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents YourSQL, a database system that
                 accelerates data-intensive queries with the help of
                 additional in-storage computing capabilities. YourSQL
                 realizes very early filtering of data by offloading
                 data scanning of a query to user-programmable
                 solid-state drives. We implement our system on a recent
                 branch of MariaDB (a variant of MySQL). In order to
                 quantify the performance gains of YourSQL, we evaluate
                 SQL queries with varying complexities. Our result shows
                 that YourSQL reduces the execution time of the whole
                 TPC-H queries by $ 3.6 \times $, compared to a vanilla
                 system. Moreover, the average speed-up of the five
                 TPC-H queries with the largest performance gains
                 reaches over $ 15 \times $. Thanks to this significant
                 reduction of execution time, we observe sizable energy
                 savings. Our study demonstrates that the YourSQL
                 approach, combining the power of early filtering with
                 end-to-end datapath optimization, can accelerate
                 large-scale analytic queries with lower energy
                 consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2016:LBM,
  author =       "Lu Lu and Xuanhua Shi and Yongluan Zhou and Xiong
                 Zhang and Hai Jin and Cheng Pei and Ligang He and
                 Yuanzhen Geng",
  title =        "Lifetime-based memory management for distributed data
                 processing systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "936--947",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994513",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In-memory caching of intermediate data and eager
                 combining of data in shuffle buffers have been shown to
                 be very effective in minimizing the re-computation and
                 I/O cost in distributed data processing systems like
                 Spark and Flink. However, it has also been widely
                 reported that these techniques would create a large
                 amount of long-living data objects in the heap, which
                 may quickly saturate the garbage collector, especially
                 when handling a large dataset, and hence would limit
                 the scalability of the system. To eliminate this
                 problem, we propose a lifetime-based memory management
                 framework, which, by automatically analyzing the
                 user-defined functions and data types, obtains the
                 expected lifetime of the data objects, and then
                 allocates and releases memory space accordingly to
                 minimize the garbage collection overhead. In
                 particular, we present Deca, a concrete implementation
                 of our proposal on top of Spark, which transparently
                 decomposes and groups objects with similar lifetimes
                 into byte arrays and releases their space altogether
                 when their lifetimes come to an end. An extensive
                 experimental study using both synthetic and real
                 datasets shows that, in comparing to Spark, Deca is
                 able to (1) reduce the garbage collection time by up to
                 99.9\%, (2) to achieve up to 22.7x speed up in terms of
                 execution time in cases without data spilling and 41.6x
                 speedup in cases with data spilling, and (3) to consume
                 up to 46.6\% less memory.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Krishnan:2016:AID,
  author =       "Sanjay Krishnan and Jiannan Wang and Eugene Wu and
                 Michael J. Franklin and Ken Goldberg",
  title =        "{ActiveClean}: interactive data cleaning for
                 statistical modeling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "948--959",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994514",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analysts often clean dirty data iteratively--cleaning
                 some data, executing the analysis, and then cleaning
                 more data based on the results. We explore the
                 iterative cleaning process in the context of
                 statistical model training, which is an increasingly
                 popular form of data analytics. We propose ActiveClean,
                 which allows for progressive and iterative cleaning in
                 statistical modeling problems while preserving
                 convergence guarantees. ActiveClean supports an
                 important class of models called convex loss models
                 (e.g., linear regression and SVMs), and prioritizes
                 cleaning those records likely to affect the results. We
                 evaluate ActiveClean on five real-world datasets UCI
                 Adult, UCI EEG, MNIST, IMDB, and Dollars For Docs with
                 both real and synthetic errors. The results show that
                 our proposed optimizations can improve model accuracy
                 by up-to 2.5x for the same amount of data cleaned.
                 Furthermore for a fixed cleaning budget and on all real
                 dirty datasets, ActiveClean returns more accurate
                 models than uniform sampling and Active Learning.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Elgohary:2016:CLA,
  author =       "Ahmed Elgohary and Matthias Boehm and Peter J. Haas
                 and Frederick R. Reiss and Berthold Reinwald",
  title =        "Compressed linear algebra for large-scale machine
                 learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "960--971",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994515",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale machine learning (ML) algorithms are often
                 iterative, using repeated read-only data access and
                 I/O-bound matrix-vector multiplications to converge to
                 an optimal model. It is crucial for performance to fit
                 the data into single-node or distributed main memory.
                 General-purpose, heavy- and lightweight compression
                 techniques struggle to achieve both good compression
                 ratios and fast decompression speed to enable
                 block-wise uncompressed operations. Hence, we initiate
                 work on compressed linear algebra (CLA), in which
                 lightweight database compression techniques are applied
                 to matrices and then linear algebra operations such as
                 matrix-vector multiplication are executed directly on
                 the compressed representations. We contribute effective
                 column compression schemes, cache-conscious operations,
                 and an efficient sampling-based compression algorithm.
                 Our experiments show that CLA achieves in-memory
                 operations performance close to the uncompressed case
                 and good compression ratios that allow us to fit larger
                 datasets into available memory. We thereby obtain
                 significant end-to-end performance improvements up to
                 26x or reduced memory requirements.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karpathiotakis:2016:FQH,
  author =       "Manos Karpathiotakis and Ioannis Alagiannis and
                 Anastasia Ailamaki",
  title =        "Fast queries over heterogeneous data through engine
                 customization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "972--983",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994516",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Industry and academia are continuously becoming more
                 data-driven and data-intensive, relying on the analysis
                 of a wide variety of heterogeneous datasets to gain
                 insights. The different data models and formats pose a
                 significant challenge on performing analysis over a
                 combination of diverse datasets. Serving all queries
                 using a single, general-purpose query engine is slow.
                 On the other hand, using a specialized engine for each
                 heterogeneous dataset increases complexity: queries
                 touching a combination of datasets require an
                 integration layer over the different engines. This
                 paper presents a system design that natively supports
                 heterogeneous data formats and also minimizes query
                 execution times. For multi-format support, the design
                 uses an expressive query algebra which enables
                 operations over various data models. For minimal
                 execution times, it uses a code generation mechanism to
                 mimic the system and storage most appropriate to answer
                 a query fast. We validate our design by building
                 Proteus, a query engine which natively supports queries
                 over CSV, JSON, and relational binary data, and which
                 specializes itself to each query, dataset, and workload
                 via code generation. Proteus outperforms
                 state-of-the-art open-source and commercial systems on
                 both synthetic and real-world workloads without being
                 tied to a single data model or format, all while
                 exposing users to a single query interface.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bhowmick:2016:DDV,
  author =       "Sourav S. Bhowmick and Byron Choi and Curtis Dyreson",
  title =        "Data-driven visual graph query interface construction
                 and maintenance: challenges and opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "984--992",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994517",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Visual query interfaces make it easy for scientists
                 and other nonexpert users to query a data collection.
                 Heretofore, visual query interfaces have been
                 statically-constructed, independent of the data. In
                 this paper we outline a vision of a different kind of
                 interface, one that is built (in part) from the data.
                 In our data-driven approach, the visual interface is
                 dynamically constructed and maintained. A data-driven
                 approach has many benefits such as reducing the cost in
                 constructing and maintaining an interface, superior
                 support for query formulation, and increased
                 portability of the interface. We focus on graph
                 databases, but our approach is applicable to several
                 other kinds of databases such as JSON and XML.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abedjan:2016:DDE,
  author =       "Ziawasch Abedjan and Xu Chu and Dong Deng and Raul
                 Castro Fernandez and Ihab F. Ilyas and Mourad Ouzzani
                 and Paolo Papotti and Michael Stonebraker and Nan
                 Tang",
  title =        "Detecting data errors: where are we and what needs to
                 be done?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "993--1004",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994518",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data cleaning has played a critical role in ensuring
                 data quality for enterprise applications. Naturally,
                 there has been extensive research in this area, and
                 many data cleaning algorithms have been translated into
                 tools to detect and to possibly repair certain classes
                 of errors such as outliers, duplicates, missing values,
                 and violations of integrity constraints. Since
                 different types of errors may coexist in the same data
                 set, we often need to run more than one kind of tool.
                 In this paper, we investigate two pragmatic questions:
                 (1) are these tools robust enough to capture most
                 errors in real-world data sets? and (2) what is the
                 best strategy to holistically run multiple tools to
                 optimize the detection effort? To answer these two
                 questions, we obtained multiple data cleaning tools
                 that utilize a variety of error detection techniques.
                 We also collected five real-world data sets, for which
                 we could obtain both the raw data and the ground truth
                 on existing errors. In this paper, we report our
                 experimental findings on the errors detected by the
                 tools we tested. First, we show that the coverage of
                 each tool is well below 100\%. Second, we show that the
                 order in which multiple tools are run makes a big
                 difference. Hence, we propose a holistic multi-tool
                 strategy that orders the invocations of the available
                 tools to maximize their benefit, while minimizing human
                 effort in verifying results. Third, since this holistic
                 approach still does not lead to acceptable error
                 coverage, we discuss two simple strategies that have
                 the potential to improve the situation, namely domain
                 specific tools and data enrichment. We close this paper
                 by reasoning about the errors that are not detectable
                 by any of the tools we tested.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2016:ESH,
  author =       "Hai Liu and Dongqing Xiao and Pankaj Didwania and
                 Mohamed Y. Eltabakh",
  title =        "Exploiting soft and hard correlations in big data
                 query optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1005--1016",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994519",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Big data infrastructures are increasingly supporting
                 datasets that are relatively structured. These datasets
                 are full of correlations among their attributes, which
                 if managed in systematic ways would enable optimization
                 opportunities that otherwise will be missed. Unlike
                 relational databases in which discovering and
                 exploiting the correlations in query optimization have
                 been extensively studied, in big data infrastructures,
                 such important data properties and their utilization
                 have been mostly abandoned. The key reason is that
                 domain experts may know many correlations but with a
                 degree of uncertainty (fuzziness or softness). Since
                 the data is big, it is very challenging to validate
                 such correlations, judge their worthiness, and put
                 strategies for utilizing them in query optimization.
                 Existing techniques for exploiting soft correlations in
                 RDBMSs, e.g., BHUNT, CORDS, and CM, are heavily
                 tailored towards optimizing factors inherent in
                 relational databases, e.g., predicate selectivity and
                 random I/O accesses of secondary indexes, which are
                 issues not applicable to big data infrastructures,
                 e.g., Hadoop. In this paper, we propose the EXORD
                 system to fill in this gap by exploiting the data's
                 correlations in big data query optimization. EXORD
                 supports two types of correlations; hard
                 correlations---which are guaranteed to hold for all
                 data records, and soft correlations---which are
                 expected to hold for most, but not all, data records.
                 We introduce a new three-phase approach for (1)
                 Validating and judging the worthiness of soft
                 correlations, (2) Selecting and preparing the soft
                 correlations for deployment by specially handling the
                 violating data records, and (3) Deploying and
                 exploiting the correlations in query optimization. We
                 propose a novel cost-benefit model for adaptively
                 selecting the most beneficial soft correlations w.r.t a
                 given query workload while minimizing the introduced
                 overhead. We show the complexity of this problem
                 (NP-Hard), and propose a heuristic to efficiently solve
                 it in a polynomial time. EXORD can be integrated with
                 various state-of-art big data query optimization
                 techniques, e.g., indexing and partitioning. EXORD
                 prototype is implemented as an extension to the Hive
                 engine on top of Hadoop. The experimental evaluation
                 shows the potential of EXORD in achieving more than 10x
                 speedup while introducing minimal storage overheads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kahng:2016:IBN,
  author =       "Minsuk Kahng and Shamkant B. Navathe and John T.
                 Stasko and Duen Horng Polo Chau",
  title =        "Interactive browsing and navigation in relational
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1017--1028",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994520",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Although researchers have devoted considerable
                 attention to helping database users formulate queries,
                 many users still find it challenging to specify queries
                 that involve joining tables. To help users construct
                 join queries for exploring relational databases, we
                 propose ETable, a novel presentation data model that
                 provides users with a presentation-level interactive
                 view. This view compactly presents one-to-many and
                 many-to-many relationships within a single enriched
                 table by allowing a cell to contain a set of entity
                 references. Users can directly interact with this
                 enriched table to incrementally construct complex
                 queries and navigate databases on a conceptual
                 entity-relationship level. In a user study,
                 participants performed a range of database querying
                 tasks faster with ETable than with a commercial
                 graphical query builder. Subjective feedback about
                 ETable was also positive. All participants found that
                 ETable was easier to learn and helpful for exploring
                 databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Borovica-Gajic:2016:CDA,
  author =       "Renata Borovica-Gaji{\'c} and Raja Appuswamy and
                 Anastasia Ailamaki",
  title =        "Cheap data analytics using cold storage devices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1029--1040",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994521",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Enterprise databases use storage tiering to lower
                 capital and operational expenses. In such a setting,
                 data waterfalls from an SSD-based high-performance tier
                 when it is ``hot'' (frequently accessed) to a
                 disk-based capacity tier and finally to a tape-based
                 archival tier when ``cold'' (rarely accessed). To
                 address the unprecedented growth in the amount of cold
                 data, hardware vendors introduced new devices named
                 Cold Storage Devices (CSD) explicitly targeted at cold
                 data workloads. With access latencies in tens of
                 seconds and cost/GB as low as \$0.01/GB/month, CSD
                 provide a middle ground between the low-latency (ms),
                 high-cost, HDD-based capacity tier, and high-latency
                 (min to h), low-cost, tape-based, archival tier. Driven
                 by the price/performance aspect of CSD, this paper
                 makes a case for using CSD as a replacement for both
                 capacity and archival tiers of enterprise databases.
                 Although CSD offer major cost savings, we show that
                 current database systems can suffer from severe
                 performance drop when CSD are used as a replacement for
                 HDD due to the mismatch between design assumptions made
                 by the query execution engine and actual storage
                 characteristics of the CSD. We then build a CSD-driven
                 query execution framework, called Skipper, that
                 modifies both the database execution engine and CSD
                 scheduling algorithms to be aware of each other. Using
                 results from our implementation of the architecture
                 based on PostgreSQL and OpenStack Swift, we show that
                 Skipper is capable of completely masking the high
                 latency overhead of CSD, thereby opening up CSD for
                 wider adoption as a storage tier for cheap data
                 analytics over cold data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shun:2016:PLG,
  author =       "Julian Shun and Farbod Roosta-Khorasani and Kimon
                 Fountoulakis and Michael W. Mahoney",
  title =        "Parallel local graph clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1041--1052",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994522",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph clustering has many important applications in
                 computing, but due to growing sizes of graph, even
                 traditionally fast clustering methods such as spectral
                 partitioning can be computationally expensive for
                 real-world graphs of interest. Motivated partly by
                 this, so-called local algorithms for graph clustering
                 have received significant interest due to the fact that
                 they can find good clusters in a graph with work
                 proportional to the size of the cluster rather than
                 that of the entire graph. This feature has proven to be
                 crucial in making such graph clustering and many of its
                 downstream applications efficient in practice. While
                 local clustering algorithms are already faster than
                 traditional algorithms that touch the entire graph,
                 they are sequential and there is an opportunity to make
                 them even more efficient via parallelization. In this
                 paper, we show how to parallelize many of these
                 algorithms in the shared-memory multicore setting, and
                 we analyze the parallel complexity of these algorithms.
                 We present comprehensive experiments on large-scale
                 graphs showing that our parallel algorithms achieve
                 good parallel speedups on a modern multicore machine,
                 thus significantly speeding up the analysis of local
                 graph clusters in the very large-scale setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tong:2016:OMM,
  author =       "Yongxin Tong and Jieying She and Bolin Ding and Lei
                 Chen and Tianyu Wo and Ke Xu",
  title =        "Online minimum matching in real-time spatial data:
                 experiments and analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1053--1064",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994523",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, with the development of mobile Internet and
                 smartphones, the online minimum bipartite matching in
                 real time spatial data (OMBM) problem becomes popular.
                 Specifically, given a set of service providers with
                 specific locations and a set of users who dynamically
                 appear one by one, the OMBM problem is to find a
                 maximum-cardinality matching with minimum total
                 distance following that once a user appears, s/he must
                 be immediately matched to an unmatched service
                 provider, which cannot be revoked, before subsequent
                 users arrive. To address this problem, existing studies
                 mainly focus on analyzing the worst-case competitive
                 ratios of the proposed online algorithms, but study on
                 the performance of the algorithms in practice is
                 absent. In this paper, we present a comprehensive
                 experimental comparison of the representative
                 algorithms of the OMBM problem. Particularly, we
                 observe a surprising result that the simple and
                 efficient greedy algorithm, which has been considered
                 as the worst due to its exponential worst-case
                 competitive ratio, is significantly more effective than
                 other algorithms. We investigate the results and
                 further show that the competitive ratio of the worst
                 case of the greedy algorithm is actually just a
                 constant, 3.195, in the average-case analysis. We try
                 to clarify a 25-year misunderstanding towards the
                 greedy algorithm and justify that the greedy algorithm
                 is not bad at all. Finally, we provide a uniform
                 implementation for all the algorithms of the OMBM
                 problem and clarify their strengths and weaknesses,
                 which can guide practitioners to select appropriate
                 algorithms for various scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Brunel:2016:IAH,
  author =       "Robert Brunel and Norman May and Alfons Kemper",
  title =        "Index-assisted hierarchical computations in
                 main-memory {RDBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1065--1076",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994524",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We address the problem of expressing and evaluating
                 computations on hierarchies represented as database
                 tables. Engine support for such computations is very
                 limited today, and so they are usually outsourced into
                 stored procedures or client code. Recently, data model
                 and SQL language extensions were proposed to
                 conveniently represent and work with hierarchies. On
                 that basis we introduce a concept of structural
                 grouping to relational algebra, provide concise syntax
                 to express a class of useful computations, and discuss
                 algorithms to evaluate them efficiently by exploiting
                 available indexing schemes. This extends the
                 versatility of RDBMS towards a great many use cases
                 dealing with hierarchical data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ohsaka:2016:DIA,
  author =       "Naoto Ohsaka and Takuya Akiba and Yuichi Yoshida and
                 Ken-ichi Kawarabayashi",
  title =        "Dynamic influence analysis in evolving networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1077--1088",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994525",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose the first real-time fully-dynamic index
                 data structure designed for influence analysis on
                 evolving networks. With this aim, we carefully redesign
                 the data structure of the state-of-the-art sketching
                 method introduced by Borgs et al., and construct
                 corresponding update algorithms. Using this index, we
                 present algorithms for two kinds of queries, influence
                 estimation and influence maximization, which are
                 strongly motivated by practical applications, such as
                 viral marketing. We provide a thorough theoretical
                 analysis, which guarantees the non-degeneracy of the
                 solution accuracy after an arbitrary number of updates.
                 Furthermore, we introduce a reachability-tree-based
                 technique and a skipping method, which greatly reduce
                 the time consumption required for edge/vertex deletions
                 and vertex additions, respectively, and counter-based
                 random number generators, which improve the space
                 efficiency. Experimental evaluations using real dynamic
                 networks with tens of millions of edges demonstrate the
                 efficiency, scalability, and accuracy of our proposed
                 indexing scheme. Specifically, it can reflect a graph
                 modification within a time of several orders of
                 magnitude smaller than that required to reconstruct an
                 index from scratch, estimate the influence spread of a
                 vertex set accurately within a millisecond, and select
                 highly influential vertices at least ten times faster
                 than state-of-the-art static algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tran:2016:DBO,
  author =       "Luan Tran and Liyue Fan and Cyrus Shahabi",
  title =        "Distance-based outlier detection in data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1089--1100",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994526",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Continuous outlier detection in data streams has
                 important applications in fraud detection, network
                 security, and public health. The arrival and departure
                 of data objects in a streaming manner impose new
                 challenges for outlier detection algorithms, especially
                 in time and space efficiency. In the past decade,
                 several studies have been performed to address the
                 problem of distance-based outlier detection in data
                 streams (DODDS), which adopts an unsupervised
                 definition and does not have any distributional
                 assumptions on data values. Our work is motivated by
                 the lack of comparative evaluation among the
                 state-of-the-art algorithms using the same datasets on
                 the same platform. We systematically evaluate the most
                 recent algorithms for DODDS under various stream
                 settings and outlier rates. Our extensive results show
                 that in most settings, the MCOD algorithm offers the
                 superior performance among all the algorithms,
                 including the most recent algorithm Thresh\_LEAP.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mountantonakis:2016:MLC,
  author =       "Michalis Mountantonakis and Yannis Tzitzikas",
  title =        "On measuring the lattice of commonalities among
                 several linked datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1101--1112",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994527",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A big number of datasets has been published according
                 to the principles of Linked Data and this number keeps
                 increasing. Although the ultimate objective is linking
                 and integration, it is not currently evident how
                 connected the current LOD cloud is. Measurements (and
                 indexes) that involve more than two datasets are not
                 available although they are important: (a) for
                 obtaining complete information about one particular URI
                 (or set of URIs) with provenance (b) for aiding dataset
                 discovery and selection, (c) for assessing the
                 connectivity between any set of datasets for quality
                 checking and for monitoring their evolution over time,
                 (d) for constructing visualizations that provide more
                 informative overviews. Since it would be prohibitively
                 expensive to perform all these measurements in a
                 na{\"\i}ve way, in this paper we introduce indexes (and
                 their construction algorithms) that can speedup such
                 tasks. In brief, we introduce (i) a namespace-based
                 prefix index, (ii) a sameAs catalog for computing the
                 symmetric and transitive closure of the owl:sameAs
                 relationships encountered in the datasets, (iii) a
                 semantics-aware element index (that exploits the
                 aforementioned indexes), and finally (iv) two
                 lattice-based incremental algorithms for speeding up
                 the computation of the intersection of URIs of any set
                 of datasets. We discuss the speedup obtained by the
                 introduced indexes and algorithms through comparative
                 results and finally we report measurements about
                 connectivity of the LOD cloud that have never been
                 carried out so far.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chang:2016:ORD,
  author =       "Zhao Chang and Dong Xie and Feifei Li",
  title =        "Oblivious {RAM}: a dissection and experimental
                 evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1113--1124",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994528",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many companies choose the cloud as their data and IT
                 infrastructure platform. The remote access of the data
                 brings the issue of trust. Despite the use of strong
                 encryption schemes, adversaries can still learn
                 valuable information regarding encrypted data by
                 observing the data access patterns. To that end, one
                 can hide the access patterns, which may leak sensitive
                 information, using Oblivious RAMs (ORAMs). Numerous
                 works have proposed different ORAM constructions, but
                 they have never been thoroughly compared against and
                 tested on large databases. There are also no open
                 source implementation of these schemes. These
                 limitations make it difficult for researchers and
                 practitioners to choose and adopt a suitable ORAM for
                 their applications. To address this issue, we provide a
                 thorough study over several practical ORAM
                 constructions, and implement them under the same
                 library. We perform extensive experiments to provide
                 insights into their performance characteristics with
                 respect to efficiency, scalability, and communication
                 cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kastrati:2016:OCP,
  author =       "Fisnik Kastrati and Guido Moerkotte",
  title =        "Optimization of conjunctive predicates for main memory
                 column stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1125--1136",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994529",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Optimization of queries with conjunctive predicates
                 for main memory databases remains a challenging task.
                 The traditional way of optimizing this class of queries
                 relies on predicate ordering based on selectivities or
                 ranks. However, the optimization of queries with
                 conjunctive predicates is a much more challenging task,
                 requiring a holistic approach in view of (1) an
                 accurate cost model that is aware of CPU architectural
                 characteristics such as branch (mis)prediction, (2) a
                 storage layer, allowing for a streamlined query
                 execution, (3) a common subexpression elimination
                 technique, minimizing column access costs, and (4) an
                 optimization algorithm able to pick the optimal plan
                 even in presence of a small (bounded) estimation error.
                 In this work, we embrace the holistic approach, and
                 show its superiority experimentally. Current approaches
                 typically base their optimization algorithms on at
                 least one of two assumptions: (1) the predicate
                 selectivities are assumed to be independent, (2) the
                 predicate costs are assumed to be constant. Our
                 approach is not based on these assumptions, as they in
                 general do not hold.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chothia:2016:EOM,
  author =       "Zaheer Chothia and John Liagouris and Frank McSherry
                 and Timothy Roscoe",
  title =        "Explaining outputs in modern data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1137--1148",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994530",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We report on the design and implementation of a
                 general framework for interactively explaining the
                 outputs of modern data-parallel computations, including
                 iterative data analytics. To produce explanations,
                 existing works adopt a naive backward tracing approach
                 which runs into known issues; naive backward tracing
                 may identify: (i) too much information that is
                 difficult to process, and (ii) not enough information
                 to reproduce the output, which hinders the logical
                 debugging of the program. The contribution of this work
                 is twofold. First, we provide methods to effectively
                 reduce the size of explanations based on the first
                 occurrence of a record in an iterative computation.
                 Second, we provide a general method for identifying
                 explanations that are sufficient to reproduce the
                 target output in arbitrary computations --- a problem
                 for which no viable solution existed until now. We
                 implement our approach on differential dataflow, a
                 modern high-throughput, low-latency dataflow platform.
                 We add a small (but extensible) set of rules to explain
                 each of its data-parallel operators, and we implement
                 these rules as differential dataflow operators
                 themselves. This choice allows our implementation to
                 inherit the performance characteristics of differential
                 dataflow, and results in a system that efficiently
                 computes and updates explanatory inputs even as the
                 inputs of the reference computation change. We evaluate
                 our system with various analytic tasks on real
                 datasets, and we show that it produces concise
                 explanations in tens of milliseconds, while remaining
                 faster --- up to two orders of magnitude --- than even
                 the best implementations that do not support
                 explanations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Buneman:2016:RGA,
  author =       "Peter Buneman and Slawek Staworko",
  title =        "{RDF} graph alignment with bisimulation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1149--1160",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994531",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We investigate the problem of aligning two RDF
                 databases, an essential problem in understanding the
                 evolution of ontologies. Our approaches address three
                 fundamental challenges: (1) the use of ``blank'' (null)
                 names, (2) ontology changes in which different names
                 are used to identify the same entity, and (3) small
                 changes in the data values as well as small changes in
                 the graph structure of the RDF database. We propose
                 approaches inspired by the classical notion of graph
                 bisimulation and extend them to capture the natural
                 metrics of edit distance on the data values and the
                 graph structure. We evaluate our methods on three
                 evolving curated data sets. Overall, our results show
                 that the proposed methods perform well and are
                 scalable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bursztyn:2016:TRA,
  author =       "Damian Bursztyn and Fran{\c{c}}ois Goasdou{\'e} and
                 Ioana Manolescu",
  title =        "Teaching an {RDBMS} about ontological constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1161--1172",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994532",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the presence of an ontology, query answers must
                 reflect not only data explicitly present in the
                 database, but also implicit data, which holds due to
                 the ontology, even though it is not present in the
                 database. A large and useful set of ontology languages
                 enjoys FOL reducibility of query answering: answering a
                 query can be reduced to evaluating a certain
                 first-order logic (FOL) formula (obtained from the
                 query and ontology) against only the explicit facts. We
                 present a novel query optimization framework for
                 ontology-based data access settings enjoying FOL
                 reducibility. Our framework is based on searching
                 within a set of alternative equivalent FOL queries,
                 i.e., FOL reformulations, one with minimal evaluation
                 cost when evaluated through a relational database
                 system. We apply this framework to the DL-Lite$_R$
                 Description Logic underpinning the W3C's OWL2 QL
                 ontology language, and demonstrate through experiments
                 its performance benefits when two leading SQL systems,
                 one open-source and one commercial, are used for
                 evaluating the FOL query reformulations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Simonini:2016:BLS,
  author =       "Giovanni Simonini and Sonia Bergamaschi and H. V.
                 Jagadish",
  title =        "{BLAST}: a loosely schema-aware meta-blocking approach
                 for entity resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1173--1184",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994533",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Identifying records that refer to the same entity is a
                 fundamental step for data integration. Since it is
                 prohibitively expensive to compare every pair of
                 records, blocking techniques are typically employed to
                 reduce the complexity of this task. These techniques
                 partition records into blocks and limit the comparison
                 to records co-occurring in a block. Generally, to deal
                 with highly heterogeneous and noisy data (e.g.
                 semi-structured data of the Web), these techniques rely
                 on redundancy to reduce the chance of missing matches.
                 Meta-blocking is the task of restructuring blocks
                 generated by redundancy-based blocking techniques,
                 removing superfluous comparisons. Existing
                 meta-blocking approaches rely exclusively on
                 schema-agnostic features. In this paper, we demonstrate
                 how ``loose'' schema information (i.e., statistics
                 collected directly from the data) can be exploited to
                 enhance the quality of the blocks in a holistic loosely
                 schema-aware (meta-)blocking approach that can be used
                 to speed up your favorite Entity Resolution algorithm.
                 We call it B last (Blocking with Loosely-Aware Schema
                 Techniques). We show how Blast can automatically
                 extract this loose information by adopting a LSH-based
                 step for efficiently scaling to large datasets. We
                 experimentally demonstrate, on real-world datasets, how
                 Blast outperforms the state-of-the-art unsupervised
                 meta-blocking approaches, and, in many cases, also the
                 supervised one.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2016:LEI,
  author =       "Erkang Zhu and Fatemeh Nargesian and Ken Q. Pu and
                 Ren{\'e}e J. Miller",
  title =        "{LSH} ensemble: {Internet}-scale domain search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1185--1196",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994534",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of domain search where a domain
                 is a set of distinct values from an unspecified
                 universe. We use Jaccard set containment score, defined
                 as $ | Q \cap X | / | Q | $, as the measure of
                 relevance of a domain $X$ to a query domain $Q$. Our
                 choice of Jaccard set containment over Jaccard
                 similarity as a measure of relevance makes our work
                 particularly suitable for searching Open Data and data
                 on the web, as Jaccard similarity is known to have poor
                 performance over sets with large differences in their
                 domain sizes. We demonstrate that the domains found in
                 several real-life Open Data and web data repositories
                 show a power-law distribution over their domain sizes.
                 We present a new index structure, Locality Sensitive
                 Hashing (LSH) Ensemble, that solves the domain search
                 problem using set containment at Internet scale. Our
                 index structure and search algorithm cope with the data
                 volume and skew by means of data sketches using Minwise
                 Hashing and domain partitioning. Our index structure
                 does not assume a prescribed set of data values. We
                 construct a cost model that describes the accuracy of
                 LSH Ensemble with any given partitioning. This allows
                 us to formulate the data partitioning for LSH Ensemble
                 as an optimization problem. We prove that there exists
                 an optimal partitioning for any data distribution.
                 Furthermore, for datasets following a power-law
                 distribution, as observed in Open Data and Web data
                 corpora, we show that the optimal partitioning can be
                 approximated using equi-depth, making it particularly
                 efficient to use in practice. We evaluate our algorithm
                 using real data (Canadian Open Data and WDC Web Tables)
                 containing up over 262 million domains. The experiments
                 demonstrate that our index consistently outperforms
                 other leading alternatives in accuracy and performance.
                 The improvements are most dramatic for data with large
                 skew in the domain sizes. Even at 262 million domains,
                 our index sustains query performance with under 3
                 seconds response time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Konda:2016:MTBa,
  author =       "Pradap Konda and Sanjib Das and Paul Suganthan G. C.
                 and AnHai Doan and Adel Ardalan and Jeffrey R. Ballard
                 and Han Li and Fatemah Panahi and Haojun Zhang and Jeff
                 Naughton and Shishir Prasad and Ganesh Krishnan and
                 Rohit Deep and Vijay Raghavendra",
  title =        "{Magellan}: toward building entity matching management
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1197--1208",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994535",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity matching (EM) has been a long-standing
                 challenge in data management. Most current EM works
                 focus only on developing matching algorithms. We argue
                 that far more efforts should be devoted to building EM
                 systems. We discuss the limitations of current EM
                 systems, then present as a solution Magellan, a new
                 kind of EM systems. Magellan is novel in four important
                 aspects. (1) It provides how-to guides that tell users
                 what to do in each EM scenario, step by step. (2) It
                 provides tools to help users do these steps; the tools
                 seek to cover the entire EM pipeline, not just matching
                 and blocking as current EM systems do. (3) Tools are
                 built on top of the data analysis and Big Data stacks
                 in Python, allowing Magellan to borrow a rich set of
                 capabilities in data cleaning, IE, visualization,
                 learning, etc. (4) Magellan provides a powerful
                 scripting environment to facilitate interactive
                 experimentation and quick ``patching'' of the system.
                 We describe research challenges raised by Magellan,
                 then present extensive experiments with 44 students and
                 users at several organizations that show the promise of
                 the Magellan approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Saha:2016:AOD,
  author =       "Diptikalyan Saha and Avrilia Floratou and Karthik
                 Sankaranarayanan and Umar Farooq Minhas and Ashish R.
                 Mittal and Fatma {\"O}zcan",
  title =        "{ATHENA}: an ontology-driven system for natural
                 language querying over relational data stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1209--1220",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994536",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we present ATHENA, an ontology-driven
                 system for natural language querying of complex
                 relational databases. Natural language interfaces to
                 databases enable users easy access to data, without the
                 need to learn a complex query language, such as SQL.
                 ATHENA uses domain specific ontologies, which describe
                 the semantic entities, and their relationships in a
                 domain. We propose a unique two-stage approach, where
                 the input natural language query (NLQ) is first
                 translated into an intermediate query language over the
                 ontology, called OQL, and subsequently translated into
                 SQL. Our two-stage approach allows us to decouple the
                 physical layout of the data in the relational store
                 from the semantics of the query, providing physical
                 independence. Moreover, ontologies provide richer
                 semantic information, such as inheritance and
                 membership relations, that are lost in a relational
                 schema. By reasoning over the ontologies, our NLQ
                 engine is able to accurately capture the user intent.
                 We study the effectiveness of our approach using three
                 different workloads on top of geographical (GEO),
                 academic (MAS) and financial (FIN) data. ATHENA
                 achieves 100\% precision on the GEO and MAS workloads,
                 and 99\% precision on the FIN workload which operates
                 on a complex financial ontology. Moreover, ATHENA
                 attains 87.2\%, 88.3\%, and 88.9\% recall on the GEO,
                 MAS, and FIN workloads, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wesley:2016:ICC,
  author =       "Richard Wesley and Fei Xu",
  title =        "Incremental computation of common windowed holistic
                 aggregates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1221--1232",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994537",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Windowed aggregates are a SQL 2003 feature for
                 computing aggregates in moving windows. Common examples
                 include cumulative sums, local maxima and moving
                 quantiles. With the advent over the last few years of
                 easy-to-use data analytics tools, these functions are
                 becoming widely used by more and more analysts, but
                 some aggregates (such as local maxima) are much easier
                 to compute than others (such as moving quantiles).
                 Nevertheless, aggregates that are more difficult to
                 compute, like quantile and mode (or ``most frequent'')
                 provide more appropriate statistical summaries in the
                 common situation when a distribution is not Gaussian
                 and are an essential part of a data analysis toolkit.
                 Recent work has described highly efficient windowed
                 implementations of the most common aggregate function
                 categories, including distributive$^1$ aggregates such
                 as cumulative sums and algebraic aggregates such as
                 moving averages. But little has been published on
                 either the implementation or the performance of the
                 more complex holistic windowed aggregates such as
                 moving quantiles. This paper provides the first
                 in-depth study of how to efficiently implement the
                 three most common holistic windowed aggregates (count
                 distinct, mode and quantile) by reusing the aggregate
                 state between consecutive frames. Our measurements show
                 that these incremental algorithms generally achieve
                 improvements of about 10x over na{\"\i}ve
                 implementations, and that they can effectively detect
                 when to reset the internal state during extreme frame
                 variation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fang:2016:ECS,
  author =       "Yixiang Fang and Reynold Cheng and Siqiang Luo and
                 Jiafeng Hu",
  title =        "Effective community search for large attributed
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "12",
  pages =        "1233--1244",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/2994509.2994538",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 6 16:21:12 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a graph $G$ and a vertex $ q \in G$, the
                 community search query returns a subgraph of $G$ that
                 contains vertices related to $q$. Communities, which
                 are prevalent in attributed graphs such as social
                 networks and knowledge bases, can be used in emerging
                 applications such as product advertisement and setting
                 up of social events. In this paper, we investigate the
                 attributed community query (or ACQ), which returns an
                 attributed community (AC) for an attributed graph. The
                 AC is a subgraph of $G$, which satisfies both structure
                 cohesiveness (i.e., its vertices are tightly connected)
                 and keyword cohesiveness (i.e., its vertices share
                 common keywords). The AC enables a better understanding
                 of how and why a community is formed (e.g., members of
                 an AC have a common interest in music, because they all
                 have the same keyword ``music''). An AC can be
                 ``personalized''; for example, an ACQ user may specify
                 that an AC returned should be related to some specific
                 keywords like ``research'' and ``sports''. To enable
                 efficient AC search, we develop the CL-tree index
                 structure and three algorithms based on it. We evaluate
                 our solutions on four large graphs, namely Flickr,
                 DBLP, Tencent, and DBpedia. Our results show that ACs
                 are more effective and efficient than existing
                 community retrieval approaches. Moreover, an AC
                 contains more precise and personalized information than
                 that of existing community search and detection
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lang:2016:TIA,
  author =       "Willis Lang and Karthik Ramachandra and David J.
                 DeWitt and Shize Xu and Qun Guo and Ajay Kalhan and
                 Peter Carlin",
  title =        "Not for the timid: on the impact of aggressive
                 over-booking in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1245--1256",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To lower hosting costs and service prices,
                 database-as-a-service (DBaaS) providers strive to
                 maximize cluster utilization without negatively
                 affecting their users' service experience. Some of the
                 most effective approaches for increasing service
                 efficiency result in the over-booking of the cluster
                 with user databases. For instance, one approach is to
                 reclaim cluster capacity from a database when it is
                 idle, temporarily re-using the capacity for some other
                 purpose, and over-booking the cluster's resources. Such
                 approaches are largely driven by policies that
                 determine when it is prudent to temporarily reclaim
                 capacity from an idle database. In this paper, we
                 examine policies that inherently tune the system's idle
                 sensitivity. Increased sensitivity to idleness leads to
                 aggressive over-booking while the converse leads to
                 conservative reclamation and lower utilization levels.
                 Aggressive over-booking also incurs a ``reserve''
                 capacity cost (for when we suddenly ``owe'' capacity to
                 previously idle databases.) We answer these key
                 questions in this paper: (1) how to find a ``good''
                 resource reclamation policy for a given DBaaS cluster
                 of users; and (2) how to forecast the needed near-term
                 reserve capacity. To help us answer these questions, we
                 used production user activity traces from Azure SQL DB
                 and built models of an over-booking mechanism. We show
                 that choosing the right policy can substantially boost
                 the efficiency of the service, facilitating lower
                 service prices via lower amortized infrastructure
                 costs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sevenich:2016:UDS,
  author =       "Martin Sevenich and Sungpack Hong and Oskar van Rest
                 and Zhe Wu and Jayanta Banerjee and Hassan Chafi",
  title =        "Using domain-specific languages for analytic graph
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1257--1268",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently graph has been drawing lots of attention both
                 as a natural data model that captures fine-grained
                 relationships between data entities and as a tool for
                 powerful data analysis that considers such
                 relationships. In this paper, we present a new graph
                 database system that integrates a robust graph storage
                 with an efficient graph analytics engine. Primarily,
                 our system adopts two domain-specific languages (DSLs),
                 one for describing graph analysis algorithms and the
                 other for graph pattern matching queries. Compared to
                 the API-based approaches in conventional graph
                 processing systems, the DSL-based approach provides
                 users with more flexible and intuitive ways of
                 expressing algorithms and queries. Moreover, the
                 DSL-based approach has significant performance benefits
                 as well, (1) by skipping (remote) API invocation
                 overhead and (2) by applying high-level optimization
                 from the compiler.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2016:KLM,
  author =       "Shaosu Liu and Bin Song and Sriharsha Gangam and
                 Lawrence Lo and Khaled Elmeleegy",
  title =        "{Kodiak}: leveraging materialized views for very
                 low-latency analytics over high-dimensional web-scale
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1269--1280",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Turn's online advertising campaigns produce petabytes
                 of data. This data is composed of trillions of events,
                 e.g. impressions, clicks, etc., spanning multiple
                 years. In addition to a timestamp, each event includes
                 hundreds of fields describing the user's attributes,
                 campaign's attributes, attributes of where the ad was
                 served, etc. Advertisers need advanced analytics to
                 monitor their running campaigns' performance, as well
                 as to optimize future campaigns. This involves slicing
                 and dicing the data over tens of dimensions over
                 arbitrary time ranges. Many of these queries need to
                 power the web portal to provide reports and dashboards.
                 For an interactive response time, they have to have
                 tens of milliseconds latency. At Turn's scale of
                 operations, no existing system was able to deliver this
                 performance in a cost effective manner. Kodiak, a
                 distributed analytical data platform for web-scale
                 high-dimensional data, was built to serve this need. It
                 relies on pre-computations to materialize thousands of
                 views to serve these advanced queries. These views are
                 partitioned and replicated across Kodiak's storage
                 nodes for scalability and reliability. They are system
                 maintained as new events arrive. At query time, the
                 system auto-selects the most suitable view to serve
                 each query. Kodiak has been used in production for over
                 a year. It hosts 2490 views for over three petabytes of
                 raw data serving over 200K queries daily. It has median
                 and 99\% query latencies of 8 ms and 252 ms
                 respectively. Our experiments show that its query
                 latency is 3 orders of magnitude faster than leading
                 big data platforms on head-to-head comparisons using
                 Turn's query workload. Moreover, Kodiak uses 4 orders
                 of magnitude less resources to run the same workload.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sharma:2016:GRT,
  author =       "Aneesh Sharma and Jerry Jiang and Praveen Bommannavar
                 and Brian Larson and Jimmy Lin",
  title =        "{GraphJet}: real-time content recommendations at
                 {Twitter}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1281--1292",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents GraphJet, a new graph-based system
                 for generating content recommendations at Twitter. As
                 motivation, we trace the evolution of our formulation
                 and approach to the graph recommendation problem,
                 embodied in successive generations of systems. Two
                 trends can be identified: supplementing batch with
                 real-time processing and a broadening of the scope of
                 recommendations from users to content. Both of these
                 trends come together in Graph-Jet, an in-memory graph
                 processing engine that maintains a real-time bipartite
                 interaction graph between users and tweets. The storage
                 engine implements a simple API, but one that is
                 sufficiently expressive to support a range of
                 recommendation algorithms based on random walks that we
                 have refined over the years. Similar to Cassovary, a
                 previous graph recommendation engine developed at
                 Twitter, GraphJet assumes that the entire graph can be
                 held in memory on a single server. The system organizes
                 the interaction graph into temporally-partitioned index
                 segments that hold adjacency lists. GraphJet is able to
                 support rapid ingestion of edges while concurrently
                 serving lookup queries through a combination of compact
                 edge encoding and a dynamic memory allocation scheme
                 that exploits power-law characteristics of the graph.
                 Each GraphJet server ingests up to one million graph
                 edges per second, and in steady state, computes up to
                 500 recommendations per second, which translates into
                 several million edge read operations per second.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ma:2016:DFP,
  author =       "Edward Ma and Vishrut Gupta and Meichun Hsu and
                 Indrajit Roy",
  title =        "\pkg{dmapply}: a functional primitive to express
                 distributed machine learning algorithms in {R}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1293--1304",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/s-plus.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Due to R's popularity as a data-mining tool, many
                 distributed systems expose an R-based API to users who
                 need to build a distributed application in R. As a
                 result, data scientists have to learn to use different
                 interfaces such as RHadoop, SparkR, Revolution R's
                 ScaleR, and HPE's Distributed R. Unfortunately, these
                 interfaces are custom, non-standard, and difficult to
                 learn. Not surprisingly, R applications written in one
                 framework do not work in another, and each backend
                 infrastructure has spent redundant effort in
                 implementing distributed machine learning algorithms.
                 Working with the members of R-core, we have created ddR
                 (Distributed Data structures in R), a unified system
                 that works across different distributed frameworks. In
                 ddR, we introduce a novel programming primitive called
                 dmapply that executes functions on distributed data
                 structures. The dmapply primitive encapsulates
                 different computation patterns: from function and data
                 broadcast to pair-wise communication. We show that
                 dmapply is powerful enough to express algorithms that
                 fit the statistical query model, which includes many
                 popular machine learning algorithms, as well as
                 applications written in MapReduce. We have integrated
                 ddR with many backends, such as R's single-node
                 parallel framework, multi-node SNOW framework, Spark,
                 and HPE Distributed R, with few or no modifications to
                 any of these systems. We have also implemented multiple
                 machine learning algorithms which are not only portable
                 across different distributed systems, but also have
                 performance comparable to the ``native''
                 implementations on the backends. We believe that ddR
                 will standardize distributed computing in R, just like
                 the SQL interface has standardized how relational data
                 is manipulated.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pedreira:2016:CIM,
  author =       "Pedro Pedreira and Chris Croswhite and Luis Bona",
  title =        "{Cubrick}: indexing millions of records per second for
                 interactive analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1305--1316",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper describes the architecture and design of
                 Cubrick, a distributed multidimensional in-memory DBMS
                 suited for interactive analytics over highly dynamic
                 datasets. Cubrick has a strictly multidimensional data
                 model composed of cubes, dimensions and metrics,
                 supporting sub-second OLAP operations such as slice and
                 dice, roll-up and drill-down over terabytes of data.
                 All data stored in Cubrick is range partitioned by
                 every dimension and stored within containers called
                 bricks in an unordered and sparse fashion, providing
                 high data ingestion rates and indexed access through
                 any combination of dimensions. In this paper, we
                 describe details about Cubrick's internal data
                 structures, distributed model, query execution engine
                 and a few details about the current implementation.
                 Finally, we present results from a thorough
                 experimental evaluation that leveraged datasets and
                 queries collected from a few internal Cubrick
                 deployments at Facebook.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Iosup:2016:LGB,
  author =       "Alexandru Iosup and Tim Hegeman and Wing Lung Ngai and
                 Stijn Heldens and Arnau Prat-P{\'e}rez and Thomas
                 Manhardto and Hassan Chafio and Mihai Capota and
                 Narayanan Sundaram and Michael Anderson and Ilie
                 Gabriel Tanase and Yinglong Xia and Lifeng Nai and
                 Peter Boncz",
  title =        "{LDBC} graphalytics: a benchmark for large-scale graph
                 analysis on parallel and distributed platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1317--1328",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper we introduce LDBC Graphalytics, a new
                 industrial-grade benchmark for graph analysis
                 platforms. It consists of six deterministic algorithms,
                 standard datasets, synthetic dataset generators, and
                 reference output, that enable the objective comparison
                 of graph analysis platforms. Its test harness produces
                 deep metrics that quantify multiple kinds of system
                 scalability, such as horizontal/vertical and
                 weak/strong, and of robustness, such as failures and
                 performance variability. The benchmark comes with
                 open-source software for generating data and monitoring
                 performance. We describe and analyze six
                 implementations of the benchmark (three from the
                 community, three from the industry), providing insights
                 into the strengths and weaknesses of the platforms. Key
                 to our contribution, vendors perform the tuning and
                 benchmarking of their platforms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lustosa:2016:DSS,
  author =       "Hermano Lustosa and Fabio Porto and Patrick Valduriez
                 and Pablo Blanco",
  title =        "Database system support of simulation data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1329--1340",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Supported by increasingly efficient HPC
                 infra-structure, numerical simulations are rapidly
                 expanding to fields such as oil and gas, medicine and
                 meteorology. As simulations become more precise and
                 cover longer periods of time, they may produce files
                 with terabytes of data that need to be efficiently
                 analyzed. In this paper, we investigate techniques for
                 managing such data using an array DBMS. We take
                 advantage of multidimensional arrays that nicely models
                 the dimensions and variables used in numerical
                 simulations. However, a naive approach to map
                 simulation data files may lead to sparse arrays,
                 impacting query response time, in particular, when the
                 simulation uses irregular meshes to model its physical
                 domain. We propose efficient techniques to map
                 coordinate values in numerical simulations to evenly
                 distributed cells in array chunks with the use of
                 equi-depth histograms and space-filling curves. We
                 implemented our techniques in SciDB and, through
                 experiments over real-world data, compared them with
                 two other approaches: row-store and column-store DBMS.
                 The results indicate that multidimensional arrays and
                 column-stores are much faster than a traditional
                 row-store system for queries over a larger amount of
                 simulation data. They also help identifying the
                 scenarios where array DBMSs are most efficient, and
                 those where they are outperformed by column-stores.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jacques-Silva:2016:CRG,
  author =       "Gabriela Jacques-Silva and Fang Zheng and Daniel
                 Debrunner and Kun-Lung Wu and Victor Dogaru and Eric
                 Johnson and Michael Spicer and Ahmet Erdem
                 Sariy{\"u}ce",
  title =        "Consistent regions: guaranteed tuple processing in
                 {IBM} streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1341--1352",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Guaranteed tuple processing has become critically
                 important for many streaming applications. This paper
                 describes how we enabled IBM Streams, an
                 enterprise-grade stream processing system, to provide
                 data processing guarantees. Our solution goes from
                 language-level abstractions to a runtime protocol. As a
                 result, with a couple of simple annotations at the
                 source code level, IBM Streams developers can define
                 consistent regions, allowing any subgraph of their
                 streaming application to achieve guaranteed tuple
                 processing. At runtime, a consistent region
                 periodically executes a variation of the Chandy-Lamport
                 snapshot algorithm to establish a consistent global
                 state for that region. The coupling of consistent
                 states with data replay enables guaranteed tuple
                 processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Al-Kateb:2016:HRC,
  author =       "Mohammed Al-Kateb and Paul Sinclair and Grace Au and
                 Carrie Ballinger",
  title =        "Hybrid row-column partitioning in {Teradata\reg}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1353--1364",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data partitioning is an indispensable ingredient of
                 database systems due to the performance improvement it
                 can bring to any given mixed workload. Data can be
                 partitioned horizontally or vertically. While some
                 commercial proprietary and open source database systems
                 have one flavor or mixed flavors of these partitioning
                 forms, Teradata Database offers a unique hybrid
                 row-column store solution that seamlessly combines both
                 of these partitioning schemes. The key feature of this
                 hybrid solution is that either row, column, or combined
                 partitions are all stored and handled in the same way
                 internally by the underlying file system storage layer.
                 In this paper, we present the main characteristics and
                 explain the implementation approach of Teradata's
                 row-column store. We also discuss query optimization
                 techniques applicable specifically to partitioned
                 tables. Furthermore, we present a performance study
                 that demonstrates how different partitioning options
                 impact the performance of various queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fernandes:2016:THH,
  author =       "Ricardo Fernandes and Piotr Zaczkowski and Bernd
                 G{\"o}ttler and Conor Ettinoffe and Anis Moussa",
  title =        "{TrafficDB}: {HERE}'s high performance shared-memory
                 data store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1365--1376",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "HERE's traffic-aware services enable route planning
                 and traffic visualisation on web, mobile and connected
                 car applications. These services process thousands of
                 requests per second and require efficient ways to
                 access the information needed to provide a timely
                 response to end-users. The characteristics of road
                 traffic information and these traffic-aware services
                 require storage solutions with specific performance
                 features. A route planning application utilising
                 traffic congestion information to calculate the optimal
                 route from an origin to a destination might hit a
                 database with millions of queries per second. However,
                 existing storage solutions are not prepared to handle
                 such volumes of concurrent read operations, as well as
                 to provide the desired vertical scalability. This paper
                 presents TrafficDB, a shared-memory data store,
                 designed to provide high rates of read operations,
                 enabling applications to directly access the data from
                 memory. Our evaluation demonstrates that TrafficDB
                 handles millions of read operations and provides
                 near-linear scalability on multi-core machines, where
                 additional processes can be spawned to increase the
                 systems' throughput without a noticeable impact on the
                 latency of querying the data store. The paper concludes
                 with a description of how TrafficDB improved the
                 performance of our traffic-aware services running in
                 production.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Scotti:2016:CBH,
  author =       "Alex Scotti and Mark Hannum and Michael Ponomarenko
                 and Dorin Hogea and Akshat Sikarwar and Mohit Khullar
                 and Adi Zaimi and James Leddy and Rivers Zhang and
                 Fabio Angius and Lingzhi Deng",
  title =        "{Comdb2}: {Bloomberg}'s highly available relational
                 database system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1377--1388",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Comdb2 is a distributed database system designed for
                 geographical replication and high availability. In
                 contrast with the latest trends in this field, Comdb2
                 offers full transactional support, a standard
                 relational model, and the expressivity of SQL.
                 Moreover, the system allows for rich stored procedures
                 using a dialect of Lua. Comdb2 implements a
                 serializable system in which reads from any node always
                 return current values. Comdb2 provides transparent High
                 Availability through built-in service discovery and
                 sophisticated retry logic embedded in the standard API.
                 In addition to the relational data model, Comdb2
                 implements queues for publisher-to-subscriber message
                 delivery. Queues can be combined with table triggers
                 for time-consistent log distribution, providing
                 functionality commonly needed in modern OLTP. In this
                 paper we give an overview of our last twelve years of
                 work. We focus on the design choices that have made
                 Comdb2 the primary database solution within our
                 company, Bloomberg LP (BLP).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Srinivasan:2016:AAR,
  author =       "V. Srinivasan and Brian Bulkowski and Wei-Ling Chu and
                 Sunil Sayyaparaju and Andrew Gooding and Rajkumar Iyer
                 and Ashish Shinde and Thomas Lopatic",
  title =        "{Aerospike}: architecture of a real-time operational
                 {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1389--1400",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we describe the solutions developed to
                 address key technical challenges encountered while
                 building a distributed database system that can
                 smoothly handle demanding real-time workloads and
                 provide a high level of fault tolerance. Specifically,
                 we describe schemes for the efficient clustering and
                 data partitioning for the automatic scale out of
                 processing across multiple nodes and for optimizing the
                 usage of CPUs, DRAM, SSDs and networks to efficiently
                 scale up performance on one node. The techniques
                 described here were used to develop Aerospike (formerly
                 Citrusleaf), a high performance distributed database
                 system built to handle the needs of today's interactive
                 online services. Most real-time decision systems that
                 use Aerospike require very high scale and need to make
                 decisions within a strict SLA by reading from, and
                 writing to, a database containing billions of data
                 items at a rate of millions of operations per second
                 with sub-millisecond latency. For over five years,
                 Aerospike has been continuously used in over a hundred
                 successful production deployments, as many enterprises
                 have discovered that it can substantially enhance their
                 user experience.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2016:MQO,
  author =       "Jack Chen and Samir Jindel and Robert Walzer and
                 Rajkumar Sen and Nika Jimsheleishvilli and Michael
                 Andrews",
  title =        "The {MemSQL} query optimizer: a modern optimizer for
                 real-time analytics in a distributed database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1401--1412",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Real-time analytics on massive datasets has become a
                 very common need in many enterprises. These
                 applications require not only rapid data ingest, but
                 also quick answers to analytical queries operating on
                 the latest data. MemSQL is a distributed SQL database
                 designed to exploit memory-optimized, scale-out
                 architecture to enable real-time transactional and
                 analytical workloads which are fast, highly concurrent,
                 and extremely scalable. Many analytical queries in
                 MemSQL's customer workloads are complex queries
                 involving joins, aggregations, sub-queries, etc. over
                 star and snowflake schemas, often ad-hoc or produced
                 interactively by business intelligence tools. These
                 queries often require latencies of seconds or less, and
                 therefore require the optimizer to not only produce a
                 high quality distributed execution plan, but also
                 produce it fast enough so that optimization time does
                 not become a bottleneck. In this paper, we describe the
                 architecture of the MemSQL Query Optimizer and the
                 design choices and innovations which enable it quickly
                 produce highly efficient execution plans for complex
                 distributed queries. We discuss how query rewrite
                 decisions oblivious of distribution cost can lead to
                 poor distributed execution plans, and argue that to
                 choose high-quality plans in a distributed database,
                 the optimizer needs to be distribution-aware in
                 choosing join plans, applying query rewrites, and
                 costing plans. We discuss methods to make join
                 enumeration faster and more effective, such as a
                 rewrite-based approach to exploit bushy joins in
                 queries involving multiple star schemas without
                 sacrificing optimization time. We demonstrate the
                 effectiveness of the MemSQL optimizer over queries from
                 the TPC-H benchmark and a real customer workload.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lakshman:2016:NFS,
  author =       "Sarath Lakshman and Sriram Melkote and John Liang and
                 Ravi Mayuram",
  title =        "{Nitro}: a fast, scalable in-memory storage engine for
                 {NoSQL} global secondary index",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1413--1424",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present Nitro, a high-performance in-memory
                 key--value storage engine used in Couchbase 4.5 Global
                 Secondary Indexes. The Nitro storage engine is well
                 suited for the recent hardware trends like large
                 amounts of memory and many CPU cores. The storage
                 engine leverages latch-free data structures and tries
                 to achieve linear scalability for the index read-write
                 operations. The Nitro storage engine offers concurrent
                 readers and writers, lightweight database snapshots,
                 stable scan, backup and recovery operations. We
                 integrated Nitro into the Couchbase Global Secondary
                 Indexes (GSI) and observed significant improvement in
                 performance compared to our disk oriented storage
                 engine configured with the same amount of memory for
                 buffer cache. On a 32 core machine, we observed an
                 end-to-end GSI server insertion throughput of 1,650,000
                 entries/sec and index update throughput of 822,000
                 entries/sec. A single instance of Nitro data structure
                 running on a 40 core machine achieved a peak insertion
                 throughput of 4 million index entries/sec and entry
                 lookup throughput of 10 million lookups/sec.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Boehm:2016:SDM,
  author =       "Matthias Boehm and Michael W. Dusenberry and Deron
                 Eriksson and Alexandre V. Evfimievski and Faraz Makari
                 Manshadi and Niketan Pansare and Berthold Reinwald and
                 Frederick R. Reiss and Prithviraj Sen and Arvind C.
                 Surve and Shirish Tatikonda",
  title =        "{SystemML}: declarative machine learning on spark",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1425--1436",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The rising need for custom machine learning (ML)
                 algorithms and the growing data sizes that require the
                 exploitation of distributed, data-parallel frameworks
                 such as MapReduce or Spark, pose significant
                 productivity challenges to data scientists. Apache
                 SystemML addresses these challenges through declarative
                 ML by (1) increasing the productivity of data
                 scientists as they are able to express custom
                 algorithms in a familiar domain-specific language
                 covering linear algebra primitives and statistical
                 functions, and (2) transparently running these ML
                 algorithms on distributed, data-parallel frameworks by
                 applying cost-based compilation techniques to generate
                 efficient, low-level execution plans with in-memory
                 single-node and large-scale distributed operations.
                 This paper describes SystemML on Apache Spark, end to
                 end, including insights into various optimizer and
                 runtime techniques as well as performance
                 characteristics. We also share lessons learned from
                 porting SystemML to Spark and declarative ML in
                 general. Finally, SystemML is open-source, which allows
                 the database community to leverage it as a testbed for
                 further research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mishra:2016:AAD,
  author =       "Aurosish Mishra and Shasank Chavan and Allison
                 Holloway and Tirthankar Lahiri and Zhen Hua Liu and
                 Sunil Chakkappen and Dennis Lui and Vinita Subramanian
                 and Ramesh Kumar and Maria Colgan and Jesse Kamp and
                 Niloy Mukherjee and Vineet Marwah",
  title =        "Accelerating analytics with dynamic in-memory
                 expressions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1437--1448",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Oracle Database In-Memory (DBIM) accelerates analytic
                 workload performance by orders of magnitude through an
                 in-memory columnar format utilizing techniques such as
                 SIMD vector processing, in-memory storage indexes, and
                 optimized predicate evaluation and aggregation. With
                 Oracle Database 12.2, Database In-Memory is further
                 enhanced to accelerate analytic processing through a
                 novel lightweight mechanism known as Dynamic In-Memory
                 Expressions (DIMEs). The DIME mechanism automatically
                 detects frequently occurring expressions in a query
                 workload, and then creates highly optimized,
                 transactionally consistent, in-memory columnar
                 representations of these expression results. At
                 runtime, queries can directly access these DIMEs, thus
                 avoiding costly expression evaluations. Furthermore,
                 all the optimizations introduced in DBIM can apply
                 directly to DIMEs. Since DIMEs are purely in-memory
                 structures, no changes are required to the underlying
                 tables. We show that DIMEs can reduce query elapsed
                 times by several orders of magnitude without the need
                 for costly pre-computed structures such as computed
                 columns or materialized views or cubes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bhadange:2016:GSL,
  author =       "Satyajit Bhadange and Akhil Arora and Arnab
                 Bhattacharya",
  title =        "{GARUDA}: a system for large-scale mining of
                 statistically significant connected subgraphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1449--1452",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Unraveling ``interesting'' subgraphs corresponding to
                 disease/crime hotspots or characterizing habitation
                 shift patterns is an important graph mining task. With
                 the availability and growth of large-scale real-world
                 graphs, mining for such subgraphs has become the need
                 of the hour for graph miners as well as non-technical
                 end-users. In this demo, we present GARUDA, a system
                 capable of mining large-scale graphs for statistically
                 significant subgraphs in a scalable manner, and
                 provide: (1) a detailed description of the various
                 features and user-friendly GUI of GARUDA; (2) a brief
                 description of the system architecture; and (3) a
                 demonstration scenario for the audience. The
                 demonstration showcases one real graph mining task as
                 well as its ability to scale to large real graphs,
                 portraying speed-ups of upto 8--10 times over the
                 state-of-the-art MSCS algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2016:VVT,
  author =       "Huan Li and Hua Lu and Xin Chen and Gang Chen and Ke
                 Chen and Lidan Shou",
  title =        "{Vita}: a versatile toolkit for generating indoor
                 mobility data for real-world buildings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1453--1456",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate a generic, user-configurable toolkit
                 for generating different types of indoor mobility data
                 for real-world buildings. Our prototype generates the
                 desired data in a three-layer pipeline. The
                 Infrastructure Layer accepts industry-standard digital
                 building information (DBI) files to generate the host
                 indoor environment, allowing users to configure the
                 generation of a variety of positioning devices, such as
                 Wi-Fi, Bluetooth, RFID, etc. The Moving Object Layer
                 offers the functionality of defining objects or
                 trajectories, with configurable indoor moving patterns,
                 distribution models, and sampling frequencies. The
                 Positioning Layer generates synthetic signal strength
                 measurements known as raw RSSI$^1$ measurements
                 according to the positioning device data and trajectory
                 data generated at relevant layers. It also generates
                 different types of indoor positioning data through the
                 customization of all typical indoor positioning methods
                 on the raw RSSI data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bagan:2016:GFW,
  author =       "Guillaume Bagan and Angela Bonifati and Radu Ciucanu
                 and George H. L. Fletcher and Aur{\'e}lien Lemay and
                 Nicky Advokaat",
  title =        "Generating flexible workloads for graph databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1457--1460",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph data management tools are nowadays evolving at a
                 great pace. Key drivers of progress in the design and
                 study of data intensive systems are solutions for
                 synthetic generation of data and workloads, for use in
                 empirical studies. Current graph generators, however,
                 provide limited or no support for workload generation
                 or are limited to fixed use-cases. Towards addressing
                 these limitations, we demonstrate gMark, the first
                 domain- and query language-independent framework for
                 synthetic graph and query workload generation. Its
                 novel features are: (i) fine-grained control of graph
                 instance and query workload generation via expressive
                 user-defined schemas; (ii) the support of expressive
                 graph query languages, including recursion among other
                 features; and, (iii) selectivity estimation of the
                 generated queries. During the demonstration, we will
                 showcase the highly tunable generation of graphs and
                 queries through various user-defined schemas and
                 targeted selectivities, and the variety of supported
                 practical graph query languages. We will also show a
                 performance comparison of four state-of-the-art graph
                 database engines, which helps us understand their
                 current strengths and desirable future extensions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2016:AQP,
  author =       "Xiaofeng Zhou and Yang Chen and Daisy Zhe Wang",
  title =        "{ArchimedesOne}: query processing over probabilistic
                 knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1461--1464",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Knowledge bases are becoming increasingly important in
                 structuring and representing information from the web.
                 Meanwhile, web-scale information poses significant
                 scalability and quality challenges to knowledge base
                 systems. To address these challenges, we develop a
                 probabilistic knowledge base system, ArchimedesOne, by
                 scaling up the knowledge expansion and statistical
                 inference algorithms. We design a web interface for
                 users to query and update large knowledge bases. In
                 this paper, we demonstrate the ArchimedesOne system to
                 showcase its efficient query and inference engines. The
                 demonstration serves two purposes: (1) to provide an
                 interface for users to interact with ArchimedesOne
                 through load, search, and update queries; and (2) to
                 validate our approaches of knowledge expansion by
                 applying inference rules in batches using relational
                 operations and query-driven inference by focusing
                 computation on the query facts. We compare
                 ArchimedesOne with state-of-the-art approaches using
                 two knowledge bases: NELL-sports with 4.5 million facts
                 and Reverb-Sherlock with 15 million facts.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Milo:2016:RIR,
  author =       "Tova Milo and Slava Novgorodov and Wang-Chiew Tan",
  title =        "{Rudolf}: interactive rule refinement system for fraud
                 detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1465--1468",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Credit card frauds are unauthorized transactions that
                 are made or attempted by a person or an organization
                 that is not authorized by the card holders. In addition
                 to machine learning-based techniques, credit card
                 companies often employ domain experts to manually
                 specify rules that exploit domain knowledge for
                 improving the detection process. Over time, however, as
                 new (fraudulent and legitimate) transaction arrive,
                 these rules need to be updated and refined to capture
                 the evolving (fraud and legitimate) activity patterns.
                 The goal of the RUDOLF system that is demonstrated here
                 is to guide and assist domain experts in this
                 challenging task. RUDOLF automatically determines a
                 best set of candidate adaptations to existing rules to
                 capture all fraudulent transactions and, respectively,
                 omit all legitimate transactions. The proposed
                 modifications can then be further refined by domain
                 experts based on their domain knowledge, and the
                 process can be repeated until the experts are satisfied
                 with the resulting rules. Our experimental results on
                 real-life datasets demonstrate the effectiveness and
                 efficiency of our approach. We showcase RUDOLF with two
                 demonstration scenarios: detecting credit card frauds
                 and network attacks. Our demonstration will engage the
                 VLDB audience by allowing them to play the role of a
                 security expert, a credit card fraudster, or a network
                 attacker.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Maccioni:2016:GDB,
  author =       "Antonio Maccioni and Matteo Collina",
  title =        "Graph databases in the browser: using {LevelGraph} to
                 explore {New Delhi}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1469--1472",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The pervasiveness of graphs on the Web is growing;
                 however, the difficulty of managing complex graph
                 structures curbs the development of web-oriented
                 applications that embed network data. The open source
                 project, LevelGraph, aims to overcome the obstacles
                 that web developers face with graph data management.
                 LevelGraph is an easy-to-use graph database layer for
                 web applications. To demonstrate various capabilities
                 of the system, we developed a web-based application
                 that utilizes a graph database of a tourist network in
                 New Delhi. The application allows users to move around
                 the city while LevelGraph executes graph queries on the
                 underlying database. In this demonstration, we show how
                 LevelGraph's features facilitate development and
                 maintenance of web applications that embed graph
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sellam:2016:ZCQ,
  author =       "Thibault Sellam and Martin Kersten",
  title =        "{Ziggy}: characterizing query results for data
                 explorers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1473--1476",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data exploration has received much attention during
                 the last few years. The aim is to learn interesting new
                 facts from a possibly unfamiliar data set. Typically,
                 explorers operate by trial and error: they write a
                 query, inspect the results and refine their
                 specifications accordingly. In this demo proposal, we
                 present Ziggy, a system to help them understand their
                 query results. Ziggy's aim is to complement an existing
                 exploration system. It assumes that users already have
                 a query in mind, but they do not know what is
                 interesting about it. To assist them, it detects
                 characteristic views, that is, small sets of columns on
                 which the tuples in the results are different from
                 those in the rest of the database. Thanks to these
                 views, our explorers can understand why their selection
                 is unique and make more informed exploration
                 decisions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sellam:2016:BMN,
  author =       "Thibault Sellam and Robin Cijvat and Richard
                 Koopmanschap and Martin Kersten",
  title =        "{Blaeu}: mapping and navigating large tables with
                 cluster analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1477--1480",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Blaeu is an interactive database exploration tool. Its
                 aim is to guide casual users through large data tables,
                 ultimately triggering insights and serendipity. To do
                 so, it relies on a double cluster analysis mechanism.
                 It clusters the data vertically: it detects themes,
                 groups of mutually dependent columns that highlight one
                 aspect of the data. Then it clusters the data
                 horizontally. For each theme, it produces a data map,
                 an interactive visualization of the clusters in the
                 table. The data maps summarize the data. They provide a
                 visual synopsis of the clusters, as well as facilities
                 to inspect their content and annotate them. But they
                 also let the users navigate further. Our explorers can
                 change the active set of columns or drill down into the
                 clusters to refine their selection. Our prototype is
                 fully operational, ready to deliver insights from
                 complex databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{El-Roby:2016:SQR,
  author =       "Ahmed El-Roby and Khaled Ammar and Ashraf Aboulnaga
                 and Jimmy Lin",
  title =        "{Sapphire}: querying {RDF} data made simple",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1481--1484",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There is currently a large amount of publicly
                 accessible structured data available as RDF data sets.
                 For example, the Linked Open Data (LOD) cloud now
                 consists of thousands of RDF data sets with over 30
                 billion triples, and the number and size of the data
                 sets is continuously growing. Many of the data sets in
                 the LOD cloud provide public SPARQL endpoints to allow
                 issuing queries over them. These end-points enable
                 users to retrieve data using precise and highly
                 expressive SPARQL queries. However, in order to do so,
                 the user must have sufficient knowledge about the data
                 sets that she wishes to query, that is, the structure
                 of data, the vocabulary used within the data set, the
                 exact values of literals, their data types, etc. Thus,
                 while SPARQL is powerful, it is not easy to use. An
                 alternative to SPARQL that does not require as much
                 prior knowledge of the data is some form of keyword
                 search over the structured data. Keyword search queries
                 are easy to use, but inherently ambiguous in describing
                 structured queries. This demonstration introduces
                 Sapphire, a system for querying RDF data that strikes a
                 middle ground between ambiguous keyword search and
                 difficult-to-use SPARQL. Our system does not replace
                 either, but utilizes both where they are most
                 effective. Sapphire helps the user construct expressive
                 SPARQL queries that represent her information needs
                 without requiring detailed knowledge about the queried
                 data sets. These queries are then executed over public
                 SPARQL endpoints from the LOD cloud. Sapphire guides
                 the user in the query writing process by showing
                 suggestions of query terms based on the queried data,
                 and by recommending changes to the query based on a
                 predictive user model.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amsterdamer:2016:DDT,
  author =       "Yael Amsterdamer and Tova Milo and Amit Somech and
                 Brit Youngmann",
  title =        "{December}: a declarative tool for crowd member
                 selection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1485--1488",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Adequate crowd selection is an important factor in the
                 success of crowdsourcing platforms, increasing the
                 quality and relevance of crowd answers and their
                 performance in different tasks. The optimal crowd
                 selection can greatly vary depending on properties of
                 the crowd and of the task. To this end, we present
                 December, a declarative platform with novel
                 capabilities for flexible crowd selection. December
                 supports the personalized selection of crowd members
                 via a dedicated query language Member-QL. This language
                 enables specifying and combining common crowd selection
                 criteria such as properties of a crowd member's profile
                 and history, similarity between profiles in specific
                 aspects and relevance of the member to a given task.
                 This holistic, customizable approach differs from
                 previous work that has mostly focused on dedicated
                 algorithms for crowd selection in specific settings. To
                 allow efficient query execution, we implement novel
                 algorithms in December based on our generic,
                 semantically-aware definitions of crowd member
                 similarity and expertise. We demonstrate the
                 effectiveness of December and Member-QL by using the
                 VLDB community as crowd members and allowing conference
                 participants to choose from among these members for
                 different purposes and in different contexts.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2016:DVV,
  author =       "Xi He and Nisarg Raval and Ashwin Machanavajjhala",
  title =        "A demonstration of {VisDPT}: visual exploration of
                 differentially private trajectories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1489--1492",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The release of detailed taxi trips has motivated
                 numerous useful studies, but has also triggered
                 multiple privacy attacks on individuals' trips. Despite
                 these attacks, no tools are available for
                 systematically analyzing the privacy risk of released
                 trajectory data. While, recent studies have proposed
                 mechanisms to publish synthetic mobility data with
                 provable privacy guarantees, the questions on --- (1)
                 how to explain the theoretical privacy guarantee to
                 non-privacy experts; and (2) how well private data
                 preserves the properties of ground truth, remain
                 unclear. To address these issues, we propose a system
                 --- VisDPT that provides rich visualization of
                 sensitive information in trajectory databases and helps
                 data curators understand the impact on utility due to
                 privacy preserving mechanisms. We believe VisDPT will
                 enable data curators to take informed decisions while
                 publishing sanitized data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Scheuer:2016:JSA,
  author =       "Tobias Scheuer and Norman May and Alexander B{\"o}hm
                 and Daniel Scheibli",
  title =        "{JexLog}: a sonar for the abyss",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1493--1496",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today's hardware architectures provide an
                 ever-increasing number of CPU cores that can be used
                 for running concurrent operations. A big challenge is
                 to ensure that these operations are properly
                 synchronized and make efficient use of the available
                 resources. Fellow database researchers have
                 appropriately described this problem as ``staring into
                 the abyss'' of complexity [12], where reasoning about
                 the interplay of jobs on a thousand cores becomes
                 extremely challenging. In this demonstration, we show
                 how a new tool, JexLog, can help to visually analyze
                 concurrent jobs in system software and how it is used
                 to optimize for modern hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ikeda:2016:CCC,
  author =       "Kosetsu Ikeda and Atsuyuki Morishima and Habibur
                 Rahman and Senjuti Basu Roy and Saravanan
                 Thirumuruganathan and Sihem Amer-Yahia and Gautam Das",
  title =        "Collaborative crowdsourcing with {Crowd4u}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1497--1500",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Collaborative crowdsourcing is an emerging paradigm
                 where a set of workers, often with diverse and
                 complementary skills, form groups and work together to
                 complete complex tasks. While crowdsourcing has been
                 used successfully in many applications, collaboration
                 is essential for achieving a high quality outcome for a
                 number of emerging applications such as text
                 translation, citizen journalism and surveillance tasks.
                 However, no crowdsourcing platform today enables the
                 end-to-end deployment of collaborative tasks. We
                 demonstrate Crowd4U, a volunteer-based system that
                 enables the deployment of diverse crowdsourcing tasks
                 with complex data-flows, in a declarative manner. In
                 addition to treating workers and tasks as rich
                 entities, Crowd4U also provides an easy-to-use
                 form-based task UI. Crowd4U implements worker-to-task
                 assignment algorithms that are appropriate for each
                 kind of task. Once workers are assigned to tasks,
                 appropriate worker collaboration schemes are enforced
                 in order to enable effective result coordination.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2016:YWQ,
  author =       "Lei Chen and Jianliang Xu and Christian S. Jensen and
                 Yafei Li",
  title =        "{YASK}: a why-not question answering engine for
                 spatial keyword query services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1501--1504",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the proliferation of the mobile use of the web,
                 spatial keyword query (SKQ) services are gaining in
                 importance. However, state-of-the-art SKQ systems do
                 not provide systematic functionality that allows users
                 to ask why some known object is unexpectedly missing
                 from a query result and do not provide an explanation
                 for such missing objects. In this demonstration, we
                 present a system called YASK, a whY-not question
                 Answering engine for Spatial Keyword query services,
                 that is capable of answering why-not questions posed in
                 response to answers to spatial keyword top-$k$ queries.
                 Two explanation and query refinement models, namely
                 preference adjustment and keyword adaption, are
                 implemented in YASK. The system provides users not only
                 with the reasons why desired objects are missing from
                 query results, but provides also relevant refined
                 queries that revive the expected but missing objects.
                 This demonstration gives attendees hands-on experience
                 with YASK through a map-based GUI interface in which
                 attendees can issue spatial keyword queries, pose
                 why-not questions, and visualize the results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yi:2016:AVQ,
  author =       "Peipei Yi and Byron Choi and Sourav S. Bhowmick and
                 Jianliang Xu",
  title =        "{AutoG}: a visual query autocompletion framework for
                 graph databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1505--1508",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Composing queries is evidently a tedious task. This is
                 particularly true of graph queries as they are
                 typically complex and prone to errors, compounded by
                 the fact that graph schemas can be missing or too loose
                 to be helpful for query formulation. Despite the great
                 success of query formulation aids, in particular,
                 automatic query completion, graph query autocompletion
                 has received much less research attention. In this
                 demonstration, we present a novel interactive visual
                 subgraph query autocompletion framework called AutoG
                 which alleviates the potentially painstaking task of
                 graph query formulation. Specifically, given a large
                 collection of small or medium-sized graphs and a visual
                 query fragment q formulated by a user, AutoG returns
                 top-$k$ query suggestions $ Q'$ as output at
                 interactive time. Users may choose a query from $ Q'$
                 and iteratively apply AutoG to compose their queries.
                 We demonstrate various features of AutoG and its
                 superior ability to generate high quality suggestions
                 to aid visual subgraph query formulation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Miao:2016:SPR,
  author =       "Xiaoye Miao and Yunjun Gao and Gang Chen and Huiyong
                 Cui and Chong Guo and Weida Pan",
  title =        "{Si$^2$ p}: a restaurant recommendation system using
                 preference queries over incomplete information",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1509--1512",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The incomplete data is universal in many real-life
                 applications due to data integration, the limitation of
                 devices, etc. In this demonstration, we present Si$^2$
                 p, a restaurant recommendation System with Preference
                 queries on Incomplete Information. Si$^2$ p is capable
                 of friendly recommending desirable restaurants based on
                 preference queries that take the incomplete ratings
                 information into consideration. It adopts the
                 browser-server model, and incorporates three
                 functionality modules including friendly and convenient
                 query submission, flexible and useful result
                 explanation, timely and incremental dataset
                 interaction. Si$^2$ p provides the server side based on
                 an extended PostgreSQL database that integrates two
                 types of preference queries, namely, skyline and
                 top-$k$ dominating queries over incomplete data. It
                 also offers the browser-based interface for the users
                 to interact with the system. Using a real restaurant
                 dataset from TripAdvisor, we demonstrate Si$^2$ p can
                 recommend and explore the restaurants in a friendly
                 way.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bonaque:2016:MIQ,
  author =       "R. Bonaque and T. D. Cao and B. Cautis and F.
                 Goasdou{\'e} and J. Letelier and I. Manolescu and O.
                 Mendoza and S. Ribeiro and X. Tannier",
  title =        "Mixed-instance querying: a lightweight integration
                 architecture for data journalism",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1513--1516",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As the world's affairs get increasingly more digital,
                 timely production and consumption of news require to
                 efficiently and quickly exploit heterogeneous data
                 sources. Discussions with journalists revealed that
                 content management tools currently at their disposal
                 fall very short of expectations. We demonstrate
                 Tatooine, a lightweight data integration prototype,
                 which allows to quickly set up integration queries
                 across (very) heterogeneous data sources, capitalizing
                 on the many data links (joins) available in this
                 application domain. Our demonstration is based on
                 scenarios we study in collaboration with Le Monde,
                 France's major newspaper.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Butterstein:2016:PPS,
  author =       "Dennis Butterstein and Torsten Grust",
  title =        "Precision performance surgery for {CostgreSQL}:
                 {LLVM}-based Expression Compilation, Just in Time",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1517--1520",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate how the compilation of SQL expressions
                 into machine code leads to significant query runtime
                 improvements in PostgreSQL 9. Our primary goal is to
                 connect recent research in query code generation with
                 one of the most widely deployed database engines. The
                 approach calls on LLVM to translate arithmetic and
                 filter expressions into native x86 instructions just
                 before SQL query execution begins. We deliberately
                 follow a non-invasive design that does not turn
                 PostgreSQL on its head: interpreted and compiled
                 expression evaluation coexist and both are used to
                 execute the same query. We will bring an enhanced
                 version of PostgreSQL that exhibits notable runtime
                 savings and provides visual insight into exactly where
                 and how execution plans can benefit from SQL expression
                 compilation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yahya:2016:EQE,
  author =       "Mohamed Yahya and Klaus Berberich and Maya Ramanath
                 and Gerhard Weikum",
  title =        "Exploratory querying of extended knowledge graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1521--1524",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Knowledge graphs (KGs) are important assets for
                 search, analytics, and recommendations. However,
                 querying a KG to explore entities and discover facts is
                 difficult and tedious, even for users with skills in
                 SPARQL. First, users are not familiar with the
                 structure and labels of entities, classes and
                 relations. Second, KGs are bound to be incomplete, as
                 they capture only major facts about entities and their
                 relationships and miss out on many of the more subtle
                 aspects. We demonstrate TriniT, a system that
                 facilitates exploratory querying of large KGs, by
                 addressing these issues of ``vocabulary'' mismatch and
                 KG incompleteness. TriniT supports query relaxation
                 rules that are invoked to allow for relevant answers
                 which are not found otherwise. The incompleteness issue
                 is addressed by extending a KG with additional
                 text-style token triples obtained by running Open IE on
                 Web and text sources. The query language, relaxation
                 methods, and answer ranking are extended appropriately.
                 The demo shows automatic query relaxation and has
                 support for interactively adding user-customized
                 relaxations. In both situations, the demo provides
                 answer explanations and offers additional query
                 suggestions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Panev:2016:EDR,
  author =       "Kiril Panev and Sebastian Michel and Evica Milchevski
                 and Koninika Pal",
  title =        "Exploring databases via reverse engineering ranking
                 queries with {PALEO}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1525--1528",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A novel approach to explore databases using ranked
                 lists is demonstrated. Working with ranked lists,
                 capturing the relative performance of entities, is a
                 very intuitive and widely applicable concept. Users can
                 post lists of entities for which explanatory SQL
                 queries and full result lists are returned. By refining
                 the input, the results, or the queries, user can
                 interactively explore the database content. The
                 demonstrated system is centered around our PALEO
                 framework for reverse engineering OLAP-style database
                 queries and novel work on mining interesting
                 categorical attributes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bespinyowong:2016:EER,
  author =       "Ramon Bespinyowong and Wei Chen and H. V. Jagadish and
                 Yuxin Ma",
  title =        "{ExRank}: an exploratory ranking interface",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1529--1532",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Even with simple everyday tasks like online shopping
                 or choosing a restaurant, users are easily overwhelmed
                 with the large number of choices available today, each
                 with a large number of inter-related attributes. We
                 present ExRank, an interactive interface for exploring
                 data that helps users understand the relationship
                 between attribute values and find interesting items in
                 the dataset. Based on a kNN graph and a PageRank
                 algorithm, ExRank suggests which attributes the user
                 should look at, and how expressed choices in particular
                 attributes affect the distribution of values in other
                 attributes for candidate objects. It solves the problem
                 of empty result by showing similar items and when there
                 are too many results, it ranks the data for the user.
                 This demo consists of (1) the description of the
                 software architecture and the user interface (2) the
                 logic and reason behind our solution and (3) a list of
                 demonstration scenarios for showing to the audience.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Diaz:2016:SQR,
  author =       "Gonzalo Diaz and Marcelo Arenas and Michael Benedikt",
  title =        "{SPARQLByE}: querying {RDF} data by example",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1533--1536",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Semantic Web technologies such as RDF and its query
                 language, SPARQL, offer the possibility of opening up
                 the use of public datasets to a great variety of
                 ordinary users. But a key obstacle to the use of open
                 data is the unfamiliarity of users with the structure
                 of data or with SPARQL. To deal with these issues, we
                 introduce a system for querying RDF data by example. At
                 its core is a technique for reverse-engineering SPARQL
                 queries by example. We demonstrate how reverse
                 engineering along with other techniques, such as query
                 relaxation, enables our system, SPARQLByE, to guide
                 users who are unfamiliar with both the dataset and with
                 SPARQL to the desired query and result set.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2016:NNL,
  author =       "Daniel Deutch and Nave Frost and Amir Gilad",
  title =        "{NLProv}: natural language provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1537--1540",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose to present NLProv: an end-to-end Natural
                 Language (NL) interface for database queries. Previous
                 work has focused on interfaces for specifying NL
                 questions, which are then compiled into queries in a
                 formal language (e.g. SQL). We build upon this work,
                 but focus on presenting a detailed form of the answers
                 in Natural Language. The answers that we present are
                 importantly based on the provenance of tuples in the
                 query result, detailing not only which are the results
                 but also their explanations. We develop a novel method
                 for transforming provenance information to NL, by
                 leveraging the original NL question structure.
                 Furthermore, since provenance information is typically
                 large, we present two solutions for its effective
                 presentation as NL text: one that is based on
                 provenance factorization with novel desiderata relevant
                 to the NL case, and one that is based on
                 summarization.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandra:2016:PMA,
  author =       "Bikash Chandra and Mathew Joseph and Bharath
                 Radhakrishnan and Shreevidhya Acharya and S.
                 Sudarshan",
  title =        "Partial marking for automated grading of {SQL}
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1541--1544",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The XData system, currently being developed at IIT
                 Bombay, provides an automated and interactive platform
                 for grading student SQL queries, as well as for
                 learning SQL. Prior work on the XData system focused on
                 generating query specific test cases to catch common
                 errors in queries. These test cases are used to check
                 whether the student queries are correct or not. For
                 grading student assignments, it is usually not
                 sufficient to just check if a query is correct: if the
                 query is incorrect, partial marks may need to be given,
                 depending on how close the query is to being correct.
                 In this paper, we extend the XData system by adding
                 features that enable awarding of partial marks to
                 incorrect student queries. Our system is able to go
                 beyond numerous syntactic features when comparing a
                 student query with a correct query. These features of
                 our grading system allow the grading of SQL queries to
                 be fully automated, and scalable to even large class
                 sizes such as those of MOOCs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhao:2016:TPM,
  author =       "Kaiqi Zhao and Yiding Liu and Quan Yuan and Lisi Chen
                 and Zhida Chen and Gao Cong",
  title =        "Towards personalized maps: mining user preferences
                 from geo-textual data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1545--1548",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Rich geo-textual data is available online and the data
                 keeps increasing at a high speed. We propose two user
                 behavior models to learn several types of user
                 preferences from geo-textual data, and a prototype
                 system on top of the user preference models for mining
                 and search geo-textual data (called PreMiner) to
                 support personalized maps. Different from existing
                 recommender systems and data analysis systems, PreMiner
                 highly personalizes user experience on maps and
                 supports several applications, including user mobility
                 \& interests mining, opinion mining in regions, user
                 recommendation, point-of-interest recommendation, and
                 querying and subscribing on geo-textual data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Feng:2016:SRS,
  author =       "Kaiyu Feng and Kaiqi Zhao and Yiding Liu and Gao
                 Cong",
  title =        "A system for region search and exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1549--1552",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increasing popularity of mobile devices and
                 location based services, massive amount of geo-textual
                 data (e.g., geo-tagged tweets) is being generated
                 everyday. Compared with traditional spatial data, the
                 textual dimension of geo-textual data greatly enriches
                 the data. Meanwhile, the spatial dimension of
                 geo-textual data also adds a semantically rich new
                 aspect to textual data. The large volume, together with
                 its rich semantics, calls for the need for data
                 exploration. First, it has many applications to
                 retrieve a region for exploration that satisfies
                 user-specified conditions (e.g., the size and shape of
                 the region) while maximizing some other conditions
                 (e.g., the relevance to the query keywords of the
                 objects in the region). Second, it is useful to mine
                 and explore the topics of the geo-textual data within a
                 (specified or retrieved) region and perhaps a timespan.
                 This demonstration proposal presents the main ideas of
                 our system, the Region Search and Exploration System
                 (RISE), for efficiently supporting region search and
                 exploration, and our demonstration plan.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Vitorovic:2016:SSR,
  author =       "Aleksandar Vitorovic and Mohammed Elseidy and Khayyam
                 Guliyev and Khue Vu Minh and Daniel Espino and Mohammad
                 Dashti and Yannis Klonatos and Christoph Koch",
  title =        "{Squall}: scalable real-time analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1553--1556",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Squall is a scalable online query engine that runs
                 complex analytics in a cluster using skew-resilient,
                 adaptive operators. Squall builds on state-of-the-art
                 partitioning schemes and local algorithms, including
                 some of our own. This paper presents the overview of
                 Squall, including some novel join operators. The paper
                 also presents lessons learned over the five years of
                 working on this system, and outlines the plan for the
                 proposed system demonstration.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khurana:2016:GBE,
  author =       "Udayan Khurana and Srinivasan Parthasarathy and Deepak
                 Turaga",
  title =        "Graph-based exploration of non-graph datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1557--1560",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphs or networks provide a powerful abstraction to
                 view and analyze relationships among different entities
                 present in a dataset. However, much of the data of
                 interest to analysts and data scientists resides in
                 non-graph forms such as relational databases, JSON,
                 XML, CSV and text. The effort and skill required in
                 identifying and extracting the relevant graph
                 representation from data is often the prohibitive and
                 limits a wider adoption of graph-based analysis of
                 non-graph data. In this paper, we demonstrate our
                 system called GraphViewer, for accelerated graph-based
                 exploration and analysis. It automatically discovers
                 relevant graphs implicit within a given non-graph
                 dataset using a set of novel rule-based and data-driven
                 techniques, and optimizes their extraction and storage.
                 It computes several node and graph level metrics and
                 detects anomalous entities in data. Finally, it
                 summarizes the results to support interpretation by a
                 human analyst. While the system automates the
                 computationally intensive aspects of the process, it is
                 engineered to leverage human domain expertise and
                 instincts to fine tune the data exploration process.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2016:RDF,
  author =       "Minjian Liu and Qing Wang",
  title =        "{Rogas}: a declarative framework for network
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1561--1564",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Network analytics has become increasingly popular in
                 recent years. Various graph systems have been developed
                 for analysing networks, while network data is still
                 largely stored and managed in relational database
                 systems in the first place. As two separate systems are
                 often used to manage and analyse network data, it not
                 only increases the difficulty for users to learn and
                 maintain these different systems simultaneously, but
                 also impedes performing more sophisticated analysis on
                 relational and topological properties of network data.
                 Aiming to tackle these issues, we present Rogas in this
                 paper, which is a declarative framework that allows the
                 user to formulate analysis queries naturally without
                 thinking about the tedious implementation details of
                 graph algorithms and query processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tang:2016:LDM,
  author =       "Mingjie Tang and Yongyang Yu and Qutaibah M. Malluhi
                 and Mourad Ouzzani and Walid G. Aref",
  title =        "{LocationSpark}: a distributed in-memory data
                 management system for big spatial data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1565--1568",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present LocationSpark, a spatial data processing
                 system built on top of Apache Spark, a widely used
                 distributed data processing system. LocationSpark
                 offers a rich set of spatial query operators, e.g.,
                 range search, k NN, spatio-textual operation,
                 spatial-join, and k NN-join. To achieve high
                 performance, LocationSpark employs various spatial
                 indexes for in-memory data, and guarantees that
                 immutable spatial indexes have low overhead with fault
                 tolerance. In addition, we build two new layers over
                 Spark, namely a query scheduler and a query executor.
                 The query scheduler is responsible for mitigating skew
                 in spatial queries, while the query executor selects
                 the best plan based on the indexes and the nature of
                 the spatial queries. Furthermore, to avoid unnecessary
                 network communication overhead when processing
                 overlapped spatial data, We embed an efficient spatial
                 Bloom filter into LocationSpark's indexes. Finally,
                 LocationSpark tracks frequently accessed spatial data,
                 and dynamically flushes less frequently accessed data
                 into disk. We evaluate our system on real workloads and
                 demonstrate that it achieves an order of magnitude
                 performance gain over a baseline framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shanbhag:2016:ASC,
  author =       "Anil Shanbhag and Alekh Jindal and Yi Lu and Samuel
                 Madden",
  title =        "{Amoeba}: a shape changing storage system for big
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1569--1572",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data partitioning significantly improves the query
                 performance in distributed database systems. A large
                 number of techniques have been proposed to efficiently
                 partition a dataset for a given query workload.
                 However, many modern analytic applications involve
                 ad-hoc or exploratory analysis where users do not have
                 a representative query workload upfront. Furthermore,
                 workloads change over time as businesses evolve or as
                 analysts gain better understanding of their data.
                 Static workload-based data partitioning techniques are
                 therefore not suitable for such settings. In this
                 paper, we describe the demonstration of Amoeba, a
                 distributed storage system which uses adaptive
                 multi-attribute data partitioning to efficiently
                 support ad-hoc as well as recurring queries. Amoeba
                 applies a robust partitioning algorithm such that
                 ad-hoc queries on all attributes have similar
                 performance gains. Thereafter, Amoeba adaptively
                 repartitions the data based on the observed query
                 sequence, i.e., the system improves over time. All
                 along Amoeba offers both adaptivity (i.e., adjustments
                 according to workload changes) as well as robustness
                 (i.e., avoiding performance spikes due to workload
                 changes). We propose to demonstrate Amoeba on scenarios
                 from an internet-of-things startup that tracks user
                 driving patterns. We invite the audience to
                 interactively fire fast ad-hoc queries, observe
                 multi-dimensional adaptivity, and play with a
                 robust/reactive knob in Amoeba. The web front end
                 displays the layout changes, runtime costs, and
                 compares it to Spark with both default and
                 workload-aware partitioning.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Olteanu:2016:FRM,
  author =       "Dan Olteanu and Maximilian Schleich",
  title =        "{F}: regression models over factorized views",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1573--1576",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate F, a system for building regression
                 models over database views. At its core lies the
                 observation that the computation and representation of
                 materialized views, and in particular of joins, entail
                 non-trivial redundancy that is not necessary for the
                 efficient computation of aggregates used for building
                 regression models. F avoids this redundancy by
                 factorizing data and computation and can outperform the
                 state-of-the-art systems MADlib, R, and Python
                 StatsModels by orders of magnitude on real-world
                 datasets. We illustrate how to incrementally build
                 regression models over factorized views using both an
                 in-memory implementation of F and its SQL encoding. We
                 also showcase the effective use of F for model
                 selection: F decouples the data-dependent computation
                 step from the data-independent convergence of model
                 parameters and only performs once the former to explore
                 the entire model space.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rodriguez:2016:SMP,
  author =       "Miguel Rodr{\'\i}guez and Sean Goldberg and Daisy Zhe
                 Wang",
  title =        "{SigmaKB}: multiple probabilistic knowledge base
                 fusion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1577--1580",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The interest in integrating web-scale knowledge bases
                 (KBs) has intensified in the last several years.
                 Research has focused on knowledge base completion
                 between two KBs with complementary information, lacking
                 any notion of uncertainty or method of handling
                 conflicting information. We present SigmaKB, a
                 knowledge base system that utilizes Consensus
                 Maximization Fusion and user feedback to integrate and
                 improve the query results of a total of 71 KBs. This
                 paper presents the architecture and demonstration
                 details.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Konda:2016:MTBb,
  author =       "Pradap Konda and Sanjib Das and Paul Suganthan G. C.
                 and AnHai Doan and Adel Ardalan and Jeffrey R. Ballard
                 and Han Li and Fatemah Panahi and Haojun Zhang and Jeff
                 Naughton and Shishir Prasad and Ganesh Krishnan and
                 Rohit Deep and Vijay Raghavendra",
  title =        "{Magellan}: toward building entity matching management
                 systems over data science stacks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1581--1584",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity matching (EM) has been a long-standing
                 challenge in data management. Most current EM works,
                 however, focus only on developing matching algorithms.
                 We argue that far more efforts should be devoted to
                 building EM systems. We discuss the limitations of
                 current EM systems, then present Magellan, a new kind
                 of EM systems that addresses these limitations.
                 Magellan is novel in four important aspects. (1) It
                 provides a how-to guide that tells users what to do in
                 each EM scenario, step by step. (2) It provides tools
                 to help users do these steps; the tools seek to cover
                 the entire EM pipeline, not just matching and blocking
                 as current EM systems do. (3) Tools are built on top of
                 the data science stacks in Python, allowing Magellan to
                 borrow a rich set of capabilities in data cleaning, IE,
                 visualization, learning, etc. (4) Magellan provide a
                 powerful scripting environment to facilitate
                 interactive experimentation and allow users to quickly
                 write code to ``patch'' the system. We have extensively
                 evaluated Magellan with 44 students and users at
                 various organizations. In this paper we propose
                 demonstration scenarios that show the promise of the
                 Magellan approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alkowaileet:2016:LSC,
  author =       "Wail Y. Alkowaileet and Sattam Alsubaiee and Michael
                 J. Carey and Till Westmann and Yingyi Bu",
  title =        "Large-scale complex analytics on semi-structured
                 datasets using {AsterixDB} and {Spark}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1585--1588",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large quantities of raw data are being generated by
                 many different sources in different formats. Private
                 and public sectors alike acclaim the valuable
                 information and insights that can be mined from such
                 data to better understand the dynamics of everyday
                 life, such as traffic, worldwide logistics, and social
                 behavior. For this reason, storing, managing, and
                 analyzing ``Big Data'' at scale is getting a tremendous
                 amount of attention, both in academia and industry. In
                 this paper, we demonstrate the power of a parallel
                 connection that we have built between Apache Spark and
                 Apache AsterixDB (Incubating) to enable complex
                 analytics such as machine learning and graph analysis
                 on data drawn from large semi-structured data
                 collections. The integration of these two systems
                 allows researchers and data scientists to leverage
                 AsterixDB capabilities, including fast ingestion and
                 indexing of semi-structured data and efficient
                 answering of geo-spatial and fuzzy text queries.
                 Complex data analytics can then be performed on the
                 resulting AsterixDB query output in order to obtain
                 additional insights by leveraging the power of Spark's
                 machine learning and graph libraries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Picado:2016:SIS,
  author =       "Jose Picado and Parisa Ataei and Arash Termehchy and
                 Alan Fern",
  title =        "Schema independent and scalable relational learning by
                 {Castor}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1589--1592",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Learning novel relations from relational databases is
                 an important problem with many applications in database
                 systems and machine learning. Relational learning
                 algorithms leverage the properties of the database
                 schema to find the definition of the target relation in
                 terms of the existing relations in the database.
                 However, the same data set may be represented under
                 different schemas for various reasons, such as
                 efficiency and data quality. Unfortunately, current
                 relational learning algorithms tend to vary quite
                 substantially over the choice of schema, which
                 complicates their off-the-shelf application. We
                 demonstrate Castor, a relational learning system that
                 efficiently learns the same definitions over common
                 schema variations. The results of Castor are more
                 accurate than well-known learning systems over large
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kannapalli:2016:AWA,
  author =       "Rajeshkumar Kannapalli and Azade Nazi and Mahashweta
                 Das and Gautam Das",
  title =        "{AD-WIRE}: add-on for {Web} item reviewing system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1593--1596",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Over the past few decades as purchasing options moved
                 online, the widespread use and popularity of online
                 review sites has simultaneously increased. In spite of
                 the fact that a huge extent of buying choices today are
                 driven by numeric scores (e.g., rating a product),
                 detailed reviews play an important role for activities
                 like purchasing an expensive DSLR camera. Since writing
                 a detailed review for an item is usually
                 time-consuming, the number of reviews available in the
                 Web is far from many. In this paper, we build a system
                 AD-WIRE that given a user and an item, our system
                 identifies the top- k meaningful tags to help her
                 review the item easily. AD-WIRE allows a user to
                 compose her review by quickly selecting from among the
                 set of returned tags or writes her own review. AD-WIRE
                 also visualizes the dependency of the tags to different
                 aspects of an item so a user can make an informed
                 decision quickly. The system can be used for different
                 type of the products. The current demonstration is
                 built to explore review writing process for the mobile
                 phones.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chaoji:2016:MLR,
  author =       "Vineet Chaoji and Rajeev Rastogi and Gourav Roy",
  title =        "Machine learning in the real world",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1597--1600",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Machine Learning (ML) has become a mature technology
                 that is being applied to a wide range of business
                 problems such as web search, online advertising,
                 product recommendations, object recognition, and so on.
                 As a result, it has become imperative for researchers
                 and practitioners to have a fundamental understanding
                 of ML concepts and practical knowledge of end-to-end
                 modeling. This tutorial takes a hands-on approach to
                 introducing the audience to machine learning. The first
                 part of the tutorial gives a broad overview and
                 discusses some of the key concepts within machine
                 learning. The second part of the tutorial takes the
                 audience through the end-to-end modeling pipeline for a
                 real-world income prediction problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bohm:2016:OAD,
  author =       "Alexander B{\"o}hm and Jens Dittrich and Niloy
                 Mukherjee and Ippokratis Pandis and Rajkumar Sen",
  title =        "Operational analytics data management systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1601--1604",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Prior to mid-2000s, the space of data analytics was
                 mainly confined within the area of decision support
                 systems. It was a long era of isolated enterprise data
                 ware houses curating information from live data sources
                 and of business intelligence software used to query
                 such information. Most data sets were small enough in
                 volume and static enough in velocity to be segregated
                 in warehouses for analysis. Data analysis was not
                 ad-hoc; it required pre-requisite knowledge of
                 underlying data access patterns for the creation of
                 specialized access methods (e.g. covering indexes,
                 materialized views) in order to efficiently execute a
                 set of few focused queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chu:2016:QDC,
  author =       "Xu Chu and Ihab F. Ilyas",
  title =        "Qualitative data cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1605--1608",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data quality is one of the most important problems in
                 data management, since dirty data often leads to
                 inaccurate data analytics results and wrong business
                 decisions. Data cleaning exercise often consist of two
                 phases: error detection and error repairing. Error
                 detection techniques can either be quantitative or
                 qualitative; and error repairing is performed by
                 applying data transformation scripts or by involving
                 human experts, and sometimes both. In this tutorial, we
                 discuss the main facets and directions in designing
                 qualitative data cleaning techniques. We present a
                 taxonomy of current qualitative error detection
                 techniques, as well as a taxonomy of current data
                 repairing techniques. We will also discuss proposals
                 for tackling the challenges for cleaning ``big data''
                 in terms of scale and distribution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Larson:2016:MMM,
  author =       "Per-{\AA}ke Larson and Justin Levandoski",
  title =        "Modern main-memory database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1609--1610",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This tutorial provides an overview of recent
                 developments in main-memory database systems. With
                 growing memory sizes and memory prices dropping by a
                 factor of 10 every 5 years, data having a ``primary
                 home'' in memory is now a reality. Main-memory
                 databases eschew many of the traditional architectural
                 tenets of relational database systems that optimized
                 for disk-resident data. Innovative approaches to
                 fundamental issues such as concurrency control and
                 query processing are required to unleash the full
                 performance potential of main-memory databases. The
                 tutorial is focused around design issues and
                 architectural choices that must be made when building a
                 high performance database system optimized for
                 main-memory: data storage and indexing, concurrency
                 control, durability and recovery techniques, query
                 processing and compilation, support for high
                 availability, and ability to support hybrid
                 transactional and analytics workloads. This will be
                 illustrated by example solutions drawn from four
                 state-of-the-art systems: H-Store/VoltDB, Hekaton,
                 HyPeR, and SAP HANA. The tutorial will also cover
                 current and future research trends.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Machanavajjhala:2016:DPW,
  author =       "Ashwin Machanavajjhala and Xi He and Michael Hay",
  title =        "Differential privacy in the wild: a tutorial on
                 current practices \& open challenges",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1611--1614",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Differential privacy has emerged as an important
                 standard for privacy preserving computation over
                 databases containing sensitive information about
                 individuals. Research on differential privacy spanning
                 a number of research areas, including theory, security,
                 database, networks, machine learning, and statistics,
                 over the last decade has resulted in a variety of
                 privacy preserving algorithms for a number of analysis
                 tasks. Despite maturing research efforts, the adoption
                 of differential privacy by practitioners in industry,
                 academia, or government agencies has so far been rare.
                 Hence, in this tutorial, we will first describe the
                 foundations of differentially private algorithm design
                 that cover the state of the art in private computation
                 on tabular data. In the second half of the tutorial we
                 will highlight real world applications on complex data
                 types, and identify research challenges in applying
                 differential privacy to real world applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amer-Yahia:2016:HFC,
  author =       "Sihem Amer-Yahia and Senjuti Basu Roy",
  title =        "Human factors in crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1615--1618",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today, crowdsourcing is used to ``taskify'' any job
                 ranging from simple receipt transcription to
                 collaborative editing, fan-subbing, citizen science,
                 and citizen journalism. The crowd is typically
                 volatile, its arrival and departure asynchronous, and
                 its levels of attention and accuracy diverse. Tasks
                 vary in complexity and may necessitate the
                 participation of workers with varying degrees of
                 expertise. Sometimes, workers need to collaborate
                 explicitly and build on each other's contributions to
                 complete a single task. For example, in disaster
                 reporting, CrowdMap allows geographically closed people
                 with diverse and complementary skills, to work together
                 to report details about the course of a typhoon or the
                 aftermath of an earthquake. This uber-ization of human
                 labor requires the understanding of workers motivation
                 in completing a task, their ability to work together in
                 collaborative tasks, as well as, helping workers find
                 relevant tasks. For over 40 years, organization studies
                 have thoroughly examined human factors that affect
                 workers in physical workplaces. More recently, computer
                 scientists have developed algorithms that verify and
                 leverage those findings in a virtual marketplace, in
                 this case, a crowdsourcing platform. The goal of this
                 tutorial is to review those two areas and discuss how
                 their combination may improve workers' experience, task
                 throughput and outcome quality for both micro-tasks and
                 collaborative tasks. We will start with a coverage of
                 motivation theory, team formation, and learning worker
                 profiles. We will then address open research questions
                 that result from this review.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Stoica:2016:TCB,
  author =       "Ion Stoica",
  title =        "Trends and challenges in big data processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1619--1619",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Almost six years ago we started the Spark project at
                 UC Berkeley. Spark is a cluster computing engine that
                 is optimized for in-memory processing, and unifies
                 support for a variety of workloads, including batch,
                 interactive querying, streaming, and iterative
                 computations. Spark is now the most active big data
                 project in the open source community, and is already
                 being used by over one thousand organizations. One of
                 the reasons behind Spark's success has been our early
                 bet on the continuous increase in the memory capacity
                 and the feasibility to fit many realistic workloads in
                 the aggregate memory of typical production clusters.
                 Today, we are witnessing new trends, such as Moore's
                 law slowing down, and the emergence of a variety of
                 computation and storage technologies, such as GPUs,
                 FPGAs, and 3D Xpoint. In this talk, I'll discuss some
                 of the lessons we learned in developing Spark as a
                 unified computation platform, and the implications of
                 today's hardware and software trends on the development
                 of the next generation of big data processing
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rajaraman:2016:DDD,
  author =       "Anand Rajaraman",
  title =        "Data-driven disruption: the view from {Silicon
                 Valley}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1620--1620",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We live in an era where software is transforming
                 industries, the sciences, and society as a whole. This
                 exciting phenomenon has been described by the phrase
                 ``software is eating the world.'' It is becoming
                 increasingly apparent that data is the fuel powering
                 software's conquests. Data is the new disruptor. It's
                 hard to believe that the first decade of the Big Data
                 era is already behind us. Silicon Valley has been at
                 the forefront of developing and applying data-driven
                 approaches to create disruption at many levels:
                 infrastructure (e.g., Hadoop and Spark), capabilities
                 (e.g., image recognition and machine translation), and
                 killer apps (e.g., self-driving cars and messaging
                 bots). In this talk, we first look back on the past
                 decade and share learnings from the frontlines of
                 data-driven disruption. Looking ahead, we then describe
                 challenges and opportunities for the next decade. Since
                 this has also been a personal journey, we will use
                 examples drawn from personal experience to illustrate
                 each point.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dong:2016:LNV,
  author =       "Xin Luna Dong",
  title =        "Leave no valuable data behind: the crazy ideas and the
                 business",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1621--1621",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the mission ``leave no valuable data behind'', we
                 developed techniques for knowledge fusion to guarantee
                 the correctness of the knowledge. This talk starts with
                 describing a few crazy ideas we have tested. The first,
                 known as ``Knowledge Vault'', used 15 extractors to
                 automatically extract knowledge from 1B+ Webpages,
                 obtaining 3B+ distinct (subject, predicate, object)
                 knowledge triples and predicting well-calibrated
                 probabilities for extracted triples. The second, known
                 as ``Knowledge-Based Trust'', estimated the
                 trustworthiness of 119M webpages and 5.6M websites
                 based on the correctness of their factual information.
                 We then present how we bring the ideas to business in
                 filling the gap between the knowledge at Google
                 Knowledge Graph and the knowledge in the world.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mokbel:2016:LDM,
  author =       "Mohamed Mokbel and Chi-Yin Chow and Walid Aref",
  title =        "Location data management: a tale of two systems and
                 the ``next destination''!",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "13",
  pages =        "1622--1622",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:19:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In early 2000, we had the vision of ubiquitous
                 location services, where each object is aware of its
                 location, and continuously sends its location to a
                 designated database server. This flood of location data
                 opened the door for a myriad of location-based services
                 that were considered visionary at that time, yet today
                 they are a reality and have become ubiquitous. To
                 realize our early vision, we identified two main
                 challenges that needed to be addressed, namely,
                 scalability and privacy. We have addressed these
                 challenges through two main systems, PLACE and Casper.
                 PLACE, developed at Purdue University from 2000 to
                 2005, set up the environment for built-in database
                 support of scalable and continuous location-based
                 services. The Casper system, developed at University of
                 Minnesota from 2005 to 2010, was built inside the PLACE
                 server allowing it to provide its high quality scalable
                 service, while maintaining the privacy of its users'
                 locations. This talk will take you through a time
                 journey of location services from 2000 until today, and
                 beyond, highlighting the development efforts of the
                 PLACE and Casper systems, along with their impact on
                 current and future research initiatives in both
                 academia and industry.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2016:QET,
  author =       "Badrish Chandramouli and Raul Castro Fernandez and
                 Jonathan Goldstein and Ahmed Eldawy and Abdul Quamar",
  title =        "{Quill}: efficient, transferable, and rich analytics
                 at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "14",
  pages =        "1623--1634",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:14:56 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper introduces Quill (stands for a quadrillion
                 tuples per day), a library and distributed platform for
                 relational and temporal analytics over large datasets
                 in the cloud. Quill exposes a new abstraction for
                 parallel datasets and computation, called
                 ShardedStreamable. This abstraction provides the
                 ability to express efficient distributed physical query
                 plans that are transferable, i.e., movable from offline
                 to real-time and vice versa. ShardedStreamable
                 decouples incremental query logic specification, a
                 small but rich set of data movement operations, and
                 keying; this allows Quill to express a broad space of
                 plans with complex querying functionality, while
                 leveraging existing temporal libraries such as Trill.
                 Quill's layered architecture provides a careful
                 separation of responsibilities with independently
                 useful components, while retaining high performance. We
                 built Quill for the cloud, with a master-less design
                 where a language-integrated client library directly
                 communicates and coordinates with cloud workers using
                 off-the-shelf distributed cloud components such as
                 queues. Experiments on up to 400 cloud machines, and on
                 datasets up to 1TB, find Quill to incur low overheads
                 and outperform SparkSQL by up to orders-of-magnitude
                 for temporal and 6$ \times $ for relational queries,
                 while supporting a rich space of transferable,
                 programmable, and expressive distributed physical query
                 plans.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Walenz:2016:PAD,
  author =       "Brett Walenz and Jun Yang",
  title =        "Perturbation analysis of database queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "14",
  pages =        "1635--1646",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:14:56 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a system, Perada, for parallel perturbation
                 analysis of database queries. Perturbation analysis
                 considers the results of a query evaluated with (a
                 typically large number of) different parameter
                 settings, to help discover leads and evaluate claims
                 from data. Perada simplifies the development of
                 general, ad hoc perturbation analysis by providing a
                 flexible API to support a variety of optimizations such
                 as grouping, memoization, and pruning; by automatically
                 optimizing performance through run-time observation,
                 learning, and adaptation; and by hiding the complexity
                 of concurrency and failures from its developers. We
                 demonstrate Perada's efficacy and efficiency with real
                 workloads applying perturbation analysis to
                 computational journalism.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2016:HBG,
  author =       "Jing Li and Hung-Wei Tseng and Chunbin Lin and Yannis
                 Papakonstantinou and Steven Swanson",
  title =        "{HippogriffDB}: balancing {I/O} and {GPU} bandwidth in
                 big data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "14",
  pages =        "1647--1658",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:14:56 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As data sets grow and conventional processor
                 performance scaling slows, data analytics move towards
                 heterogeneous architectures that incorporate hardware
                 accelerators (notably GPUs) to continue scaling
                 performance. However, existing GPU-based databases fail
                 to deal with big data applications efficiently: their
                 execution model suffers from scalability limitations on
                 GPUs whose memory capacity is limited; existing systems
                 fail to consider the discrepancy between fast GPUs and
                 slow storage, which can counteract the benefit of GPU
                 accelerators. In this paper, we propose HippogriffDB,
                 an efficient, scalable GPU-accelerated OLAP system. It
                 tackles the bandwidth discrepancy using compression and
                 an optimized data transfer path. HippogriffDB stores
                 tables in a compressed format and uses the GPU for
                 decompression, trading GPU cycles for the improved I/O
                 bandwidth. To improve the data transfer efficiency,
                 HippogriffDB introduces a peer-to-peer, multi-threaded
                 data transfer mechanism, directly transferring data
                 from the SSD to the GPU. HippogriffDB adopts a
                 query-over-block execution model that provides
                 scalability using a stream-based approach. The model
                 improves kernel efficiency with the operator fusion and
                 double buffering mechanism. We have implemented
                 HippogriffDB using an NVMe SSD, which talks directly to
                 a commercial GPU. Results on two popular benchmarks
                 demonstrate its scalability and efficiency.
                 HippogriffDB outperforms existing GPU-based databases
                 (YDB) and in-memory data analytics (MonetDB) by 1-2
                 orders of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeuch:2016:NIP,
  author =       "Steffen Zeuch and Holger Pirk and Johann-Christoph
                 Freytag",
  title =        "Non-invasive progressive optimization for in-memory
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "14",
  pages =        "1659--1670",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:14:56 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Progressive optimization introduces robustness for
                 database workloads against wrong estimates, skewed
                 data, correlated attributes, or outdated statistics.
                 Previous work focuses on cardinality estimates and rely
                 on expensive counting methods as well as complex
                 learning algorithms. In this paper, we utilize
                 performance counters to drive progressive optimization
                 during query execution. The main advantages are that
                 performance counters introduce virtually no costs on
                 modern CPUs and their usage enables a non-invasive
                 monitoring. We present fine-grained cost models to
                 detect differences between estimates and actual costs
                 which enables us to kick-start reoptimization. Based on
                 our cost models, we implement an optimization approach
                 that estimates the individual selectivities of a
                 multi-selection query efficiently. Furthermore, we are
                 able to learn properties like sortedness, skew, or
                 correlation during run-time. In our evaluation we show,
                 that the overhead of our approach is negligible, while
                 performance improvements are convincing. Using
                 progressive optimization, we improve runtime up to a
                 factor of three compared to average run-times and up to
                 a factor of 4,5 compared to worst case run-times. As a
                 result, we avoid costly operator execution orders and;
                 thus, making query execution highly robust.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2016:DSS,
  author =       "J. W. Zhang and Y. C. Tay",
  title =        "{Dscaler}: synthetically scaling a given relational
                 database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "14",
  pages =        "1671--1682",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:14:56 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Dataset Scaling Problem (DSP) defined in previous
                 work states: Given an empirical set of relational
                 tables $D$ and a scale factor $s$, generate a database
                 state $D$ that is similar to $D$ but $s$ times its
                 size. A DSP solution is useful for application
                 development $ (s < 1) $, scalability testing $ (s > 1)
                 $ and anonymization $ (s = 1) $. Current solutions
                 assume all table sizes scale by the same ratio $s$.
                 However, a real database tends to have tables that grow
                 at different rates. This paper therefore considers
                 non-uniform scaling (nuDSP), a DSP generalization
                 where, instead of a single scale factor $s$, tables can
                 scale by different factors. $D$ scaler is the first
                 solution for nuDSP. It follows previous work in
                 achieving similarity by reproducing correlation among
                 the primary and foreign keys. However, it introduces
                 the concept of a correlation database that captures
                 fine-grained, per-tuple correlation. Experiments with
                 well-known real and synthetic datasets $D$ show that
                 $D$ scaler produces $D$ with greater similarity to $D$
                 than state-of-the-art techniques. Here, similarity is
                 measured by number of tuples, frequency distribution of
                 foreign key references, and multi-join aggregate
                 queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2016:FAI,
  author =       "Sheng Wang and David Maier and Beng Chin Ooi",
  title =        "Fast and adaptive indexing of multi-dimensional
                 observational data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "14",
  pages =        "1683--1694",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:14:56 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Sensing devices generate tremendous amounts of data
                 each day, which include large quantities of
                 multi-dimensional measurements. These data are expected
                 to be immediately available for real-time analytics as
                 they are streamed into storage. Such scenarios pose
                 challenges to state-of-the-art indexing methods, as
                 they must not only support efficient queries but also
                 frequent updates. We propose here a novel indexing
                 method that ingests multi-dimensional observational
                 data in real time. This method primarily guarantees
                 extremely high throughput for data ingestion, while it
                 can be continuously refined in the background to
                 improve query efficiency. Instead of representing
                 collections of points using Minimal Bounding Boxes as
                 in conventional indexes, we model sets of successive
                 points as line segments in hyperspaces, by exploiting
                 the intrinsic value continuity in observational data.
                 This representation reduces the number of index entries
                 and drastically reduces ``over-coverage'' by entries.
                 Experimental results show that our approach handles
                 real-world workloads gracefully, providing both
                 low-overhead indexing and excellent query efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Upadhyaya:2016:POQ,
  author =       "Prasang Upadhyaya and Magdalena Balazinska and Dan
                 Suciu",
  title =        "Price-optimal querying with data {APIs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "14",
  pages =        "1695--1706",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:14:56 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data is increasingly being purchased online in data
                 markets and REST APIs have emerged as a favored method
                 to acquire such data. Typically, sellers charge buyers
                 based on how much data they purchase. In many
                 scenarios, buyers need to make repeated calls to the
                 seller's API. The challenge is then for buyers to keep
                 track of the data they purchase and avoid purchasing
                 the same data twice. In this paper, we propose
                 lightweight modifications to data APIs to achieve
                 optimal history-aware pricing so that buyers are only
                 charged once for data that they have purchased and that
                 has not been updated. The key idea behind our approach
                 is the notion of refunds: buyers buy data as needed but
                 have the ability to ask for refunds of data that they
                 had already purchased before. We show that our
                 techniques can provide significant data cost savings
                 while reducing overheads by two orders of magnitude as
                 compared to the state-of-the-art competing
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pirk:2016:VVA,
  author =       "Holger Pirk and Oscar Moll and Matei Zaharia and Sam
                 Madden",
  title =        "{Voodoo} --- a vector algebra for portable database
                 performance on modern hardware",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "9",
  number =       "14",
  pages =        "1707--1718",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 12 10:14:56 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In-memory databases require careful tuning and many
                 engineering tricks to achieve good performance. Such
                 database performance engineering is hard: a plethora of
                 data and hardware-dependent optimization techniques
                 form a design space that is difficult to navigate for a
                 skilled engineer --- even more so for a query compiler.
                 To facilitate performance-oriented design exploration
                 and query plan compilation, we present Voodoo, a
                 declarative intermediate algebra that abstracts the
                 detailed architectural properties of the hardware, such
                 as multi- or many-core architectures, caches and SIMD
                 registers, without losing the ability to generate
                 highly tuned code. Because it consists of a collection
                 of declarative, vector-oriented operations, Voodoo is
                 easier to reason about and tune than low-level C and
                 related hardware-focused extensions (Intrinsics,
                 OpenCL, CUDA, etc.). This enables our Voodoo compiler
                 to produce (OpenCL) code that rivals and even
                 outperforms the fastest state-of-the-art in memory
                 databases for both GPUs and CPUs. In addition, Voodoo
                 makes it possible to express techniques as diverse as
                 cache-conscious processing, predication and
                 vectorization (again on both GPUs and CPUs) with just a
                 few lines of code. Central to our approach is a novel
                 idea we termed control vectors, which allows a code
                 generating frontend to expose parallelism to the Voodoo
                 compiler in a abstract manner, enabling portable
                 performance across hardware platforms. We used Voodoo
                 to build an alternative backend for MonetDB, a popular
                 open-source in-memory database. Our backend allows
                 MonetDB to perform at the same level as highly tuned
                 in-memory databases, including HyPeR and Ocelot. We
                 also demonstrate Voodoo's usefulness when investigating
                 hardware conscious tuning techniques, assessing their
                 performance on different queries, devices and data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2016:CQP,
  author =       "Dawei Jiang and Qingchao Cai and Gang Chen and H. V.
                 Jagadish and Beng Chin Ooi and Kian-Lee Tan and Anthony
                 K. H. Tung",
  title =        "Cohort query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "1",
  pages =        "1--12",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3015270.3015271",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:50 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern Internet applications often produce a large
                 volume of user activity records. Data analysts are
                 interested in cohort analysis, or finding unusual user
                 behavioral trends, in these large tables of activity
                 records. In a traditional database system, cohort
                 analysis queries are both painful to specify and
                 expensive to evaluate. We propose to extend database
                 systems to support cohort analysis. We do so by
                 extending SQL with three new operators. We devise three
                 different evaluation schemes for cohort query
                 processing. Two of them adopt a non-intrusive approach.
                 The third approach employs a columnar based evaluation
                 scheme with optimizations specifically designed for
                 cohort query processing. Our experimental results
                 confirm the performance benefits of our proposed
                 columnar database system, compared against the two
                 non-intrusive approaches that implement cohort queries
                 on top of regular relational databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2016:RWY,
  author =       "Yubao Wu and Yuchen Bian and Xiang Zhang",
  title =        "Remember where you came from: on the second-order
                 random walk based proximity measures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "1",
  pages =        "13--24",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3015270.3015272",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:50 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Measuring the proximity between different nodes is a
                 fundamental problem in graph analysis. Random walk
                 based proximity measures have been shown to be
                 effective and widely used. Most existing random walk
                 measures are based on the first-order Markov model,
                 i.e., they assume that the next step of the random
                 surfer only depends on the current node. However, this
                 assumption neither holds in many real-life applications
                 nor captures the clustering structure in the graph. To
                 address the limitation of the existing first-order
                 measures, in this paper, we study the second-order
                 random walk measures, which take the previously visited
                 node into consideration. While the existing first-order
                 measures are built on node-to-node transition
                 probabilities, in the second-order random walk, we need
                 to consider the edge-to-edge transition probabilities.
                 Using incidence matrices, we develop simple and elegant
                 matrix representations for the second-order proximity
                 measures. A desirable property of the developed
                 measures is that they degenerate to their original
                 first-order forms when the effect of the previous step
                 is zero. We further develop Monte Carlo methods to
                 efficiently compute the second-order measures and
                 provide theoretical performance guarantees.
                 Experimental results show that in a variety of
                 applications, the second-order measures can
                 dramatically improve the performance compared to their
                 first-order counterparts.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{George:2016:MIL,
  author =       "Lars George and Bruno Cadonna and Matthias Weidlich",
  title =        "{IL-Miner}: instance-level discovery of complex event
                 patterns",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "1",
  pages =        "25--36",
  month =        sep,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3015270.3015273",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:50 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Complex event processing (CEP) matches patterns over a
                 continuous stream of events to detect situations of
                 interest. Yet, the definition of an event pattern that
                 precisely characterises a particular situation is
                 challenging: there are manifold dimensions to correlate
                 events, including time windows and value predicates. In
                 the presence of historic event data that is labelled
                 with the situation to detect, event patterns can be
                 learned automatically. To cope with the combinatorial
                 explosion of pattern candidates, existing approaches
                 work on a type-level and discover patterns based on
                 predefined event abstractions, aka event types. Hence,
                 discovery is limited to patterns of a fixed granularity
                 and users face the burden to manually select
                 appropriate event abstractions. We present IL-M iner, a
                 system that discovers event patterns by genuinely
                 working on the instance-level, not assuming a priori
                 knowledge on event abstractions. In a multi-phase
                 process, IL-Miner first identifies relevant
                 abstractions for the construction of event patterns.
                 The set of events explored for pattern discovery is
                 thereby reduced, while still providing formal
                 guarantees on correctness, minimality, and completeness
                 of the discovery result. Experiments using real-world
                 datasets from diverse domains show that IL-Miner
                 discovers a much broader range of event patterns
                 compared to the state-of-the-art in the field.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Psaroudakis:2016:ANA,
  author =       "Iraklis Psaroudakis and Tobias Scheuer and Norman May
                 and Abdelkader Sellami and Anastasia Ailamaki",
  title =        "Adaptive {NUMA}-aware data placement and task
                 scheduling for analytical workloads in main-memory
                 column-stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "2",
  pages =        "37--48",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Non-uniform memory access (NUMA) architectures pose
                 numerous performance challenges for main-memory
                 column-stores in scaling up analytics on modern
                 multi-socket multi-core servers. A NUMA-aware execution
                 engine needs a strategy for data placement and task
                 scheduling that prefers fast local memory accesses over
                 remote memory accesses, and avoids an imbalance of
                 resource utilization, both CPU and memory bandwidth,
                 across sockets. State-of-the-art systems typically use
                 a static strategy that always partitions data across
                 sockets, and always allows inter-socket task stealing.
                 In this paper, we show that adapting data placement and
                 task stealing to the workload can improve throughput by
                 up to a factor of 4 compared to a static approach. We
                 focus on highly concurrent workloads dominated by
                 operators working on a single table or table group
                 (copartitioned tables). Our adaptive data placement
                 algorithm tracks the resource utilization of tasks,
                 partitions of tables and table groups, and sockets.
                 When a utilization imbalance across sockets is
                 detected, the algorithm corrects it by moving or
                 repartitioning tables. Also, inter-socket task stealing
                 is dynamically disabled for memory-intensive tasks that
                 could otherwise hurt performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2016:MOC,
  author =       "Tianzheng Wang and Hideaki Kimura",
  title =        "Mostly-optimistic concurrency control for highly
                 contended dynamic workloads on a thousand cores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "2",
  pages =        "49--60",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Future servers will be equipped with thousands of CPU
                 cores and deep memory hierarchies. Traditional
                 concurrency control (CC) schemes---both optimistic and
                 pessimistic---slow down orders of magnitude in such
                 environments for highly contended workloads. Optimistic
                 CC (OCC) scales the best for workloads with few
                 conflicts, but suffers from clobbered reads for high
                 conflict workloads. Although pessimistic locking can
                 protect reads, it floods cache-coherence backbones in
                 deep memory hierarchies and can also cause numerous
                 deadlock aborts. This paper proposes a new CC scheme,
                 mostly-optimistic concurrency control (MOCC), to
                 address these problems. MOCC achieves orders of
                 magnitude higher performance for dynamic workloads on
                 modern servers. The key objective of MOCC is to avoid
                 clobbered reads for high conflict workloads, without
                 any centralized mechanisms or heavyweight interthread
                 communication. To satisfy such needs, we devise a
                 native, cancellable reader-writer spinlock and a
                 serializable protocol that can acquire, release and
                 re-acquire locks in any order without expensive
                 interthread communication. For low conflict workloads,
                 MOCC maintains OCC's high performance without taking
                 read locks. Our experiments with high conflict YCSB
                 workloads on a 288-core server reveal that MOCC
                 performs $ 8 \times $ and $ 23 \times $ faster than OCC
                 and pessimistic locking, respectively. It achieves 17
                 million TPS for TPC-C and more than 110 million TPS for
                 YCSB without conflicts, $ 170 \times $ faster than
                 pessimistic methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2016:EIA,
  author =       "Sibo Wang and Xiaokui Xiao and Yin Yang and Wenqing
                 Lin",
  title =        "Effective indexing for approximate constrained
                 shortest path queries on large road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "2",
  pages =        "61--72",
  month =        oct,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In a constrained shortest path (CSP) query, each edge
                 in the road network is associated with both a length
                 and a cost. Given an origin $s$, a destination $t$, and
                 a cost constraint $ \theta $, the goal is to find the
                 shortest path from $s$ to $t$ whose total cost does not
                 exceed $ \theta $. Because exact CSP is NP-hard,
                 previous work mostly focuses on approximate solutions.
                 Even so, existing methods are still prohibitively
                 expensive for large road networks. Two main reasons are
                 (i) that they fail to utilize the special properties of
                 road networks and (ii) that most of them process
                 queries without indices; the few existing indices
                 consume large amounts of memory and yet have limited
                 effectiveness in reducing query costs. Motivated by
                 this, we propose COLA, the first practical solution for
                 approximate CSP processing on large road networks. COLA
                 exploits the facts that a road network can be
                 effectively partitioned, and that there exists a
                 relatively small set of landmark vertices that commonly
                 appear in CSP results. Accordingly, COLA indexes the
                 vertices lying on partition boundaries, and applies an
                 on-the-fly algorithm called $ \alpha $-Dijk for path
                 computation within a partition, which effectively
                 prunes paths based on landmarks. Extensive experiments
                 demonstrate that on continent-sized road networks, COLA
                 answers an approximate CSP query in sub-second time,
                 whereas existing methods take hours. Interestingly,
                 even without an index, the $ \alpha $-Dijk algorithm in
                 COLA still outperforms previous solutions by more than
                 an order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2016:THP,
  author =       "Qun Huang and Patrick P. C. Lee",
  title =        "Toward high-performance distributed stream processing
                 via approximate fault tolerance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "73--84",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Fault tolerance is critical for distributed stream
                 processing systems, yet achieving error-free fault
                 tolerance often incurs substantial performance
                 overhead. We present AF-Stream, a distributed stream
                 processing system that addresses the trade-off between
                 performance and accuracy in fault tolerance. AF-Stream
                 builds on a notion called approximate fault tolerance,
                 whose idea is to mitigate backup overhead by adaptively
                 issuing backups, while ensuring that the errors upon
                 failures are bounded with theoretical guarantees. Our
                 AF-Stream design provides an extensible programming
                 model for incorporating general streaming algorithms,
                 and also exports only few threshold parameters for
                 configuring approximation fault tolerance. Experiments
                 on Amazon EC2 show that AF-Stream maintains high
                 performance (compared to no fault tolerance) and high
                 accuracy after multiple failures (compared to no
                 failures) under various streaming algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dai:2016:PCD,
  author =       "Jian Dai and Bin Yang and Chenjuan Guo and Christian
                 S. Jensen and Jilin Hu",
  title =        "Path cost distribution estimation using trajectory
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "85--96",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the growing volumes of vehicle trajectory data,
                 it becomes increasingly possible to capture
                 time-varying and uncertain travel costs in a road
                 network, including travel time and fuel consumption.
                 The current paradigm represents a road network as a
                 weighted graph; it blasts trajectories into small
                 fragments that fit the under-lying edges to assign
                 weights to edges; and it then applies a routing
                 algorithm to the resulting graph. We propose a new
                 paradigm, the hybrid graph, that targets more accurate
                 and more efficient path cost distribution estimation.
                 The new paradigm avoids blasting trajectories into
                 small fragments and instead assigns weights to paths
                 rather than simply to the edges. We show how to compute
                 path weights using trajectory data while taking into
                 account the travel cost dependencies among the edges in
                 the paths. Given a departure time and a query path, we
                 show how to select an optimal set of weights with
                 associated paths that cover the query path and such
                 that the weights enable the most accurate joint cost
                 distribution estimation for the query path. The cost
                 distribution of the query path is then computed
                 accurately using the joint distribution. Finally, we
                 show how the resulting method for computing cost
                 distributions of paths can be integrated into existing
                 routing algorithms. Empirical studies with substantial
                 trajectory data from two different cities offer insight
                 into the design properties of the proposed method and
                 confirm that the method is effective in real-world
                 settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sariyuce:2016:FHC,
  author =       "Ahmet Erdem Sariy{\"u}ce and Ali Pinar",
  title =        "Fast hierarchy construction for dense subgraphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "97--108",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Discovering dense subgraphs and understanding the
                 relations among them is a fundamental problem in graph
                 mining. We want to not only identify dense subgraphs,
                 but also build a hierarchy among them (e.g., larger but
                 sparser subgraphs formed by two smaller dense
                 subgraphs). Peeling algorithms (k -core, k -truss, and
                 nucleus decomposition) have been effective to locate
                 many dense subgraphs. However, constructing a
                 hierarchical representation of density structure, even
                 correctly computing the connected k -cores and k
                 -trusses, have been mostly overlooked. Keeping track of
                 connected components during peeling requires an
                 additional traversal operation, which is as expensive
                 as the peeling process. In this paper, we start with a
                 thorough survey and point to nuances in problem
                 formulations that lead to significant differences in
                 runtimes. We then propose efficient and generic
                 algorithms to construct the hierarchy of dense
                 subgraphs for k -core, k -truss, or any nucleus
                 decomposition. Our algorithms leverage the disjoint-set
                 forest data structure to efficiently construct the
                 hierarchy during traversal. Furthermore, we introduce a
                 new idea to avoid traversal. We construct the subgraphs
                 while visiting neighborhoods in the peeling process,
                 and build the relations to previously constructed
                 subgraphs. We also consider an existing idea to find
                 the k -core hierarchy and adapt for our objectives
                 efficiently. Experiments on different types of large
                 scale real-world networks show significant speedups
                 over naive algorithms and existing alternatives. Our
                 algorithms also outperform the hypothetical limits of
                 any possible traversal-based solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2016:SEE,
  author =       "Xuhong Zhang and Jun Wang and Jiangling Yin",
  title =        "{Sapprox}: enabling efficient and accurate
                 approximations on sub-datasets with distribution-aware
                 online sampling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "109--120",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we aim to enable both efficient and
                 accurate approximations on arbitrary sub-datasets of a
                 large dataset. Due to the prohibitive storage overhead
                 of caching offline samples for each sub-dataset,
                 existing offline sample based systems provide high
                 accuracy results for only a limited number of
                 sub-datasets, such as the popular ones. On the other
                 hand, current online sample based approximation
                 systems, which generate samples at runtime, do not take
                 into account the uneven storage distribution of a
                 sub-dataset. They work well for uniform distribution of
                 a sub-dataset while suffer low sampling efficiency and
                 poor estimation accuracy on unevenly distributed
                 sub-datasets. To address the problem, we develop a
                 distribution aware method called Sapprox. Our idea is
                 to collect the occurrences of a sub-dataset at each
                 logical partition of a dataset (storage distribution)
                 in the distributed system, and make good use of such
                 information to facilitate online sampling. There are
                 three thrusts in Sapprox. First, we develop a
                 probabilistic map to reduce the exponential number of
                 recorded sub-datasets to a linear one. Second, we apply
                 the cluster sampling with unequal probability theory to
                 implement a distribution-aware sampling method for
                 efficient online sub-dataset sampling. Third, we
                 quantitatively derive the optimal sampling unit size in
                 a distributed file system by associating it with
                 approximation costs and accuracy. We have implemented
                 Sapprox into Hadoop ecosystem as an example system and
                 open sourced it on GitHub. Our comprehensive
                 experimental results show that Sapprox can achieve a
                 speedup by up to $ 20 \times $ over the precise
                 execution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2016:MQO,
  author =       "Xuguang Ren and Junhu Wang",
  title =        "Multi-query optimization for subgraph isomorphism
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "121--132",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Existing work on subgraph isomorphism search mainly
                 focuses on a-query-at-a-time approaches: optimizing and
                 answering each query separately. When multiple queries
                 arrive at the same time, sequential processing is not
                 always the most efficient. In this paper, we study
                 multi-query optimization for subgraph isomorphism
                 search. We first propose a novel method for efficiently
                 detecting useful common sub-graphs and a data structure
                 to organize them. Then we propose a heuristic algorithm
                 based on the data structure to compute a query
                 execution order so that cached intermediate results can
                 be effectively utilized. To balance memory usage and
                 the time for cached results retrieval, we present a
                 novel structure for caching the intermediate results.
                 We provide strategies to revise existing single-query
                 subgraph isomorphism algorithms to seamlessly utilize
                 the cached results, which leads to significant
                 performance improvement. Extensive experiments verified
                 the effectiveness of our solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Simpson:2016:ECF,
  author =       "Michael Simpson and Venkatesh Srinivasan and Alex
                 Thomo",
  title =        "Efficient computation of feedback arc set at
                 web-scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "133--144",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The minimum feedback arc set problem is an NP-hard
                 problem on graphs that seeks a minimum set of arcs
                 which, when removed from the graph, leave it acyclic.
                 In this work, we investigate several approximations for
                 computing a minimum feedback arc set with the goal of
                 comparing the quality of the solutions and the running
                 times. Our investigation is motivated by applications
                 in Social Network Analysis such as misinformation
                 removal and label propagation. We present careful
                 algorithmic engineering for multiple algorithms to
                 improve the scalability of each approach. In
                 particular, two approaches we optimize (one greedy and
                 one randomized) provide a nice balance between feedback
                 arc set size and running time complexity. We
                 experimentally compare the performance of a wide range
                 of algorithms on a broad selection of large online
                 networks including Twitter, LiveJournal, and the
                 Clueweb12 dataset. The experiments reveal that our
                 greedy and randomized implementations outperform the
                 other approaches by simultaneously computing a feedback
                 arc set of competitive size and scaling to web-scale
                 graphs with billions of vertices and tens of billions
                 of arcs. Finally, we extend the algorithms considered
                 to the probabilistic case in which arcs are realized
                 with some fixed probability and provide detailed
                 experimental comparisons.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Antenucci:2016:DQP,
  author =       "Dolan Antenucci and Michael R. Anderson and Michael
                 Cafarella",
  title =        "A declarative query processing system for nowcasting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "145--156",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nowcasting is the practice of using social media data
                 to quantify ongoing real-world phenomena. It has been
                 used by researchers to measure flu activity,
                 unemployment behavior, and more. However, the typical
                 nowcasting workflow requires either slow and tedious
                 manual searching of relevant social media messages or
                 automated statistical approaches that are prone to
                 spurious and low-quality results. In this paper, we
                 propose a method for declaratively specifying a
                 nowcasting model; this method involves processing a
                 user query over a very large social media database,
                 which can take hours. Due to the human-in-the-loop
                 nature of constructing nowcasting models, slow runtimes
                 place an extreme burden on the user. Thus we also
                 propose a novel set of query optimization techniques,
                 which allow users to quickly construct nowcasting
                 models over very large datasets. Further, we propose a
                 novel query quality alarm that helps users estimate
                 phenomena even when historical ground truth data is not
                 available. These contributions allow us to build a
                 declarative nowcasting data management system,
                 RaccoonDB, which yields high-quality results in
                 interactive time. We evaluate RaccoonDB using 40
                 billion tweets collected over five years. We show that
                 our automated system saves work over traditional manual
                 approaches while improving result quality---57\% more
                 accurate in our user study---and that its query
                 optimizations yield a 424x speedup, allowing it to
                 process queries 123x faster than a 300-core Spark
                 cluster, using only 10\% of the computational
                 resources.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lulli:2016:NDS,
  author =       "Alessandro Lulli and Matteo Dell'Amico and Pietro
                 Michiardi and Laura Ricci",
  title =        "{NG-DBSCAN}: scalable density-based clustering for
                 arbitrary data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "157--168",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present NG-DBSCAN, an approximate density-based
                 clustering algorithm that operates on arbitrary data
                 and any symmetric distance measure. The distributed
                 design of our algorithm makes it scalable to very large
                 datasets; its approximate nature makes it fast, yet
                 capable of producing high quality clustering results.
                 We provide a detailed overview of the steps of
                 NG-DBSCAN, together with their analysis. Our results,
                 obtained through an extensive experimental campaign
                 with real and synthetic data, substantiate our claims
                 about NG-DBSCAN's performance and scalability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Neamtu:2016:ITS,
  author =       "Rodica Neamtu and Ramoza Ahsan and Elke Rundensteiner
                 and Gabor Sarkozy",
  title =        "Interactive time series exploration powered by the
                 marriage of similarity distances",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "169--180",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding similar trends among time series data is
                 critical for applications ranging from financial
                 planning to policy making. The detection of these
                 multifaceted relationships, especially time warped
                 matching of time series of different lengths and
                 alignments is prohibitively expensive to compute. To
                 achieve real time responsiveness on large time series
                 datasets, we propose a novel paradigm called Online
                 Exploration of Time Series (ONEX) employing a powerful
                 one-time preprocessing step that encodes critical
                 similarity relationships to support subsequent rapid
                 data exploration. Since the encoding of a huge number
                 of pairwise similarity relationships for all variable
                 lengths time series segments is not feasible, our work
                 rests on the important insight that clustering with
                 inexpensive point-to-point distances such as the
                 Euclidean Distance can support subsequent time warped
                 matching. Our ONEX framework overcomes the prohibitive
                 computational costs associated with a more robust
                 elastic distance namely the DTW by applying it over the
                 surprisingly compact knowledge base instead of the raw
                 data. Our comparative study reveals that ONEX is up to
                 19\% more accurate and several times faster than the
                 state-of-the-art. Beyond being a highly accurate and
                 fast domain independent solution, ONEX offers a truly
                 interactive exploration experience supporting novel
                 time series operations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2016:CLI,
  author =       "Youhuan Li and Lei Zou and Huaming Zhang and Dongyan
                 Zhao",
  title =        "Computing longest increasing subsequences over
                 sequential data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "181--192",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we propose a data structure, a
                 quadruple neighbor list (QN-list, for short), to
                 support real time queries of all longest increasing
                 subsequence (LIS) and LIS with constraints over
                 sequential data streams. The QN-List built by our
                 algorithm requires $ O(w) $ space, where w is the time
                 window size. The running time for building the initial
                 QN-List takes $ O(w \log w) $ time. Applying the
                 QN-List, insertion of the new item takes $ O(\log w) $
                 time and deletion of the first item takes $ O(w) $
                 time. To the best of our knowledge, this is the first
                 work to support both LIS enumeration and LIS with
                 constraints computation by using a single uniform data
                 structure for real time sequential data streams. Our
                 method outperforms the state-of-the-art methods in both
                 time and space cost, not only theoretically, but also
                 empirically.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chirigati:2016:KEU,
  author =       "Fernando Chirigati and Jialu Liu and Flip Korn and You
                 (Will) Wu and Cong Yu and Hao Zhang",
  title =        "Knowledge exploration using tables on the web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "193--204",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The increasing popularity of mobile device usage has
                 ushered in many features in modern search engines that
                 help users with various information needs. One of those
                 needs is Knowledge Exploration, where related documents
                 are returned in response to a user query, either
                 directly through right-hand side knowledge panels or
                 indirectly through navigable sections underneath
                 individual search results. Existing knowledge
                 exploration features have relied on a combination of
                 Knowledge Bases and query logs. In this paper, we
                 propose Knowledge Carousels of two modalities, namely
                 sideways and downwards, that facilitate exploration of
                 IS-A and HAS-A relationships, respectively, with regard
                 to an entity-seeking query, based on leveraging the
                 large corpus of tables on the Web. This brings many
                 technical challenges, including associating correct
                 carousels with the search entity, selecting the best
                 carousel from the candidates, and finding titles that
                 best describe the carousel. We describe how we address
                 these challenges and also experimentally demonstrate
                 through user studies that our approach produces better
                 result sets than baseline approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2016:HEI,
  author =       "Sibo Wang and Youze Tang and Xiaokui Xiao and Yin Yang
                 and Zengxiang Li",
  title =        "{HubPPR}: effective indexing for approximate
                 {Personalized Pagerank}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "205--216",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Personalized PageRank (PPR) computation is a
                 fundamental operation in web search, social networks,
                 and graph analysis. Given a graph $G$, a source $s$,
                 and a target $t$, the PPR query $ \Pi (s, t)$ returns
                 the probability that a random walk on $G$ starting from
                 $s$ terminates at $t$. Unlike global PageRank which can
                 be effectively pre-computed and materialized, the PPR
                 result depends on both the source and the target,
                 rendering results materialization infeasible for large
                 graphs. Existing indexing techniques have rather
                 limited effectiveness; in fact, the current
                 state-of-the-art solution, BiPPR, answers individual
                 PPR queries without pre-computation or indexing, and
                 yet it outperforms all previous index-based solutions.
                 Motivated by this, we propose HubPPR, an effective
                 indexing scheme for PPR computation with controllable
                 tradeoffs for accuracy, query time, and memory
                 consumption. The main idea is to pre-compute and index
                 auxiliary information for selected hub nodes that are
                 often involved in PPR processing. Going one step
                 further, we extend HubPPR to answer top-$k$ PPR
                 queries, which returns the $k$ nodes with the highest
                 PPR values with respect to a source $s$, among a given
                 set $T$ of target nodes. Extensive experiments
                 demonstrate that compared to the current best solution
                 BiPPR, HubPPR achieves up to 10x and 220x speedup for
                 PPR and top-$k$ PPR processing, respectively, with
                 moderate memory consumption. Notably, with a single
                 commodity server, HubPPR answers a top-$k$ PPR query in
                 seconds on graphs with billions of edges, with high
                 accuracy and strong result quality guarantees.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lai:2016:SDS,
  author =       "Longbin Lai and Lu Qin and Xuemin Lin and Ying Zhang
                 and Lijun Chang and Shiyu Yang",
  title =        "Scalable distributed subgraph enumeration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "217--228",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Subgraph enumeration aims to find all the subgraphs of
                 a large data graph that are isomorphic to a given
                 pattern graph. As the subgraph isomorphism operation is
                 computationally intensive, researchers have recently
                 focused on solving this problem in distributed
                 environments, such as MapReduce and Pregel. Among them,
                 the state-of-the-art algorithm, Twin TwigJoin, is
                 proven to be instance optimal based on a left-deep join
                 framework. However, it is still not scalable to large
                 graphs because of the constraints in the left-deep join
                 framework and that each decomposed component (join
                 unit) must be a star. In this paper, we propose SEED
                 --- a scalable sub-graph enumeration approach in the
                 distributed environment. Compared to Twin TwigJoin,
                 SEED returns optimal solution in a generalized join
                 framework without the constraints in Twin TwigJoin. We
                 use both star and clique as the join units, and design
                 an effective distributed graph storage mechanism to
                 support such an extension. We develop a comprehensive
                 cost model, that estimates the number of matches of any
                 given pattern graph by considering power-law degree
                 distribution in the data graph. We then generalize the
                 left-deep join framework and develop a
                 dynamic-programming algorithm to compute an optimal
                 bushy join plan. We also consider overlaps among the
                 join units. Finally, we propose clique compression to
                 further improve the algorithm by reducing the number of
                 the intermediate results. Extensive performance studies
                 are conducted on several real graphs, one containing
                 billions of edges. The results demonstrate that our
                 algorithm outperforms all other state-of-the-art
                 algorithms by more than one order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fujiwara:2016:FAL,
  author =       "Yasuhiro Fujiwara and Yasutoshi Ida and Junya Arai and
                 Mai Nishimura and Sotetsu Iwamura",
  title =        "Fast algorithm for the lasso based {$ L_1 $}-graph
                 construction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "229--240",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The lasso-based $ L_1$-graph is used in many
                 applications since it can effectively model a set of
                 data points as a graph. The lasso is a popular
                 regression approach and the $ L_1$ -graph represents
                 data points as nodes by using the regression result.
                 More specifically, by solving the $ L_1$-optimization
                 problem of the lasso, the sparse regression
                 coefficients are used to obtain the weights of the
                 edges in the graph. Conventional graph structures such
                 as k -NN graph use two steps, adjacency searching and
                 weight selection, for constructing the graph whereas
                 the lasso-based $ L_1$ -graph derives the adjacency
                 structure as well as the edge weights simultaneously by
                 using a coordinate descent. However, the construction
                 cost of the lasso-based $ L_1$ -graph is impractical
                 for large data sets since the coordinate descent
                 iteratively updates the weights of all edges until
                 convergence. Our proposal, Castnet, can efficiently
                 construct the lasso-based $ L_1$ -graph. In order to
                 avoid updating the weights of all edges, we prune edges
                 that cannot have nonzero weights before entering the
                 iterations. In addition, we update edge weights only if
                 they are nonzero in the iterations. Experiments show
                 that Castnet is significantly faster than existing
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhai:2016:RTS,
  author =       "Ennan Zhai and Zhenhua Li and Zhenyu Li and Fan Wu and
                 Guihai Chen",
  title =        "Resisting tag spam by leveraging implicit user
                 behaviors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "241--252",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Tagging systems are vulnerable to tag spam attacks.
                 However, defending against tag spam has been
                 challenging in practice, since adversaries can easily
                 launch spam attacks in various ways and scales. To
                 deeply understand users' tagging behaviors and explore
                 more effective defense, this paper first conducts
                 measurement experiments on public datasets of two
                 representative tagging systems: Del.icio.us and
                 CiteULike. Our key finding is that a significant
                 fraction of correct tag-resource annotations are
                 contributed by a small number of implicit similarity
                 cliques, where users annotate common resources with
                 similar tags. Guided by the above finding, we propose a
                 new service, called Spam-Resistance-as-a-Service (or
                 SRaaS), to effectively defend against heterogeneous tag
                 spam attacks even at very large scales. At the heart of
                 SRaaS is a novel reputation assessment protocol, whose
                 design leverages the implicit similarity cliques
                 coupled with the social networks inherent to typical
                 tagging systems. With such a design, SRaaS manages to
                 offer provable guarantees on diminishing the influence
                 of tag spam attacks. We build an SRaaS prototype and
                 evaluate it using a large-scale spam-oriented research
                 dataset (which is much more polluted by tag spam than
                 Del.icio.us and CiteULike datasets). Our evaluational
                 results demonstrate that SRaaS outperforms existing tag
                 spam defenses deployed in real-world systems, while
                 introducing low overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2016:GFE,
  author =       "Xiaowei Chen and Yongkun Li and Pinghui Wang and John
                 C. S. Lui",
  title =        "A general framework for estimating graphlet statistics
                 via random walk",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "253--264",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graphlets are induced subgraph patterns and have been
                 frequently applied to characterize the local topology
                 structures of graphs across various domains, e.g.,
                 online social networks (OSNs) and biological networks.
                 Discovering and computing graphlet statistics are
                 highly challenging. First, the massive size of
                 real-world graphs makes the exact computation of
                 graphlets extremely expensive. Secondly, the graph
                 topology may not be readily available so one has to
                 resort to web crawling using the available application
                 programming interfaces (APIs). In this work, we propose
                 a general and novel framework to estimate graphlet
                 statistics of `` any size. '' Our framework is based on
                 collecting samples through consecutive steps of random
                 walks. We derive an analytical bound on the sample size
                 (via the Chernoff--Hoeffding technique) to guarantee
                 the convergence of our unbiased estimator. To further
                 improve the accuracy, we introduce two novel
                 optimization techniques to reduce the lower bound on
                 the sample size. Experimental evaluations demonstrate
                 that our methods outperform the state-of-the-art method
                 up to an order of magnitude both in terms of accuracy
                 and time cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lin:2016:FMS,
  author =       "Chunbin Lin and Benjamin Mandel and Yannis
                 Papakonstantinou and Matthias Springer",
  title =        "Fast in-memory {SQL} analytics on typed graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "265--276",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study a class of graph analytics SQL queries, which
                 we call relationship queries. These queries involving
                 aggregation, join, semijoin, intersection and selection
                 are a wide superset of fixed-length graph reachability
                 queries and of tree pattern queries. We present
                 real-world OLAP scenarios, where efficient relationship
                 queries are needed. However, row stores, column stores
                 and graph databases are unacceptably slow in such OLAP
                 scenarios. We propose a GQ-Fast database, which is an
                 indexed database that roughly corresponds to efficient
                 encoding of annotated adjacency lists that combines
                 salient features of column-based organization, indexing
                 and compression. GQ-Fast uses a bottom-up fully
                 pipelined query execution model, which enables (a)
                 aggressive compression (e.g., compressed bitmaps and
                 Huffman) and (b) avoids intermediate results that
                 consist of row IDs (which are typical in column
                 databases). GQ-Fast compiles query plans into
                 executable C++ source code. Besides achieving runtime
                 efficiency, GQ-Fast also reduces main memory
                 requirements because, unlike column databases, GQ-Fast
                 selectively allows dense forms of compression including
                 heavy-weight compressions, which do not support random
                 access. We used GQ-Fast to accelerate queries for two
                 OLAP dashboards in the biomedical field. GQ-Fast
                 outperforms PostgreSQL by 2--4 orders of magnitude and
                 MonetDB, Vertica and Neo4j by 1--3 orders of magnitude
                 when all of them are running on RAM. Our experiments
                 dissect GQ-Fast's advantage between (i) the use of
                 compiled code, (ii) the bottom-up pipelining execution
                 strategy, and (iii) the use of dense structures. Other
                 analysis and experiments show the space savings of
                 GQ-Fast due to the appropriate use of compression
                 methods. We also show that the runtime penalty incurred
                 by the dense compression methods decreases as the
                 number of CPU cores increases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2016:SDA,
  author =       "Zheng Li and Tingjian Ge",
  title =        "Stochastic data acquisition for answering queries as
                 time goes by",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "3",
  pages =        "277--288",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Dec 1 09:02:03 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data and actions are tightly coupled. On one hand,
                 data analysis results trigger decision making and
                 actions. On the other hand, the action of acquiring
                 data is the very first step in the whole data
                 processing pipeline. Data acquisition almost always has
                 some costs, which could be either monetary costs or
                 computing resource costs such as sensor battery power,
                 network transfers, or I/O costs. Using out-dated data
                 to answer queries can avoid the data acquisition costs,
                 but there is a penalty of potentially inaccurate
                 results. Given a sequence of incoming queries over
                 time, we study the problem of sequential decision
                 making on when to acquire data and when to use existing
                 versions to answer each query. We propose two
                 approaches to solve this problem using reinforcement
                 learning and tailored locality-sensitive hashing. A
                 systematic empirical study using two real-world
                 datasets shows that our approaches are effective and
                 efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dai:2016:FPI,
  author =       "Haipeng Dai and Muhammad Shahzad and Alex X. Liu and
                 Yuankun Zhong",
  title =        "Finding persistent items in data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "289--300",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025112",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Frequent item mining, which deals with finding items
                 that occur frequently in a given data stream over a
                 period of time, is one of the heavily studied problems
                 in data stream mining. A generalized version of
                 frequent item mining is the persistent item mining,
                 where a persistent item, unlike a frequent item, does
                 not necessarily occur more frequently compared to other
                 items over a short period of time, rather persists and
                 occurs more frequently over a long period of time. To
                 the best of our knowledge, there is no prior work on
                 mining persistent items in a data stream. In this
                 paper, we address the fundamental problem of finding
                 persistent items in a given data stream during a given
                 period of time at any given observation point. We
                 propose a novel scheme, PIE, that can accurately
                 identify each persistent item with a probability
                 greater than any desired false negative rate (FNR)
                 while using a very small amount of memory. The key idea
                 of PIE is that it uses Raptor codes to encode the ID of
                 each item that appears at the observation point during
                 a measurement period and stores only a few bits of the
                 encoded ID in the memory of that observation point
                 during that measurement period. The item that is
                 persistent occurs in enough measurement periods that
                 enough encoded bits for the ID can be retrieved from
                 the observation point to decode them correctly and get
                 the ID of the persistent item. We implemented and
                 extensively evaluated PIE using three real network
                 traffic traces and compared its performance with two
                 prior adapted schemes. Our results show that not only
                 PIE achieves the desired FNR in every scenario, its
                 FNR, on average, is 19.5 times smaller than the FNR of
                 the best adapted prior art.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2016:BSD,
  author =       "Shuotao Xu and Sungjin Lee and Sang-Woo Jun and Ming
                 Liu and Jamey Hicks and Arvind",
  title =        "{Bluecache}: a scalable distributed flash-based
                 key--value store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "301--312",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025113",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A key--value store (KVS), such as memcached and Redis,
                 is widely used as a caching layer to augment the slower
                 persistent backend storage in data centers. DRAM-based
                 KVS provides fast key--value access, but its
                 scalability is limited by the cost, power and space
                 needed by the machine cluster to support a large amount
                 of DRAM. This paper offers a 10X to 100X cheaper
                 solution based on flash storage and hardware
                 accelerators. In BlueCache key--value pairs are stored
                 in flash storage and all KVS operations, including the
                 flash controller are directly implemented in hardware.
                 Furthermore, BlueCache includes a fast interconnect
                 between flash controllers to provide a scalable
                 solution. We show that BlueCache has 4.18X higher
                 throughput and consumes 25X less power than a
                 flash-backed KVS software implementation on x86
                 servers. We further show that BlueCache can outperform
                 DRAM-based KVS when the latter has more than 7.4\%
                 misses for a read-intensive aplication. BlueCache is an
                 attractive solution for both rack-level appliances and
                 data-center-scale key--value cache.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2016:GPP,
  author =       "Qi Fan and Dongxiang Zhang and Huayu Wu and Kian-Lee
                 Tan",
  title =        "A general and parallel platform for mining co-movement
                 patterns over large-scale trajectories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "313--324",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025114",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Discovering co-movement patterns from large-scale
                 trajectory databases is an important mining task and
                 has a wide spectrum of applications. Previous studies
                 have identified several types of interesting
                 co-movement patterns and show-cased their usefulness.
                 In this paper, we make two key contributions to this
                 research field. First, we propose a more general
                 co-movement pattern to unify those defined in the past
                 literature. Second, we propose two types of parallel
                 and scalable frameworks and deploy them on Apache
                 Spark. To the best of our knowledge, this is the first
                 work to mine co-movement patterns in real life
                 trajectory databases with hundreds of millions of
                 points. Experiments on three real life large-scale
                 trajectory datasets have verified the efficiency and
                 scalability of our proposed solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shao:2016:VTE,
  author =       "Zhou Shao and Muhammad Aamir Cheema and David Taniar
                 and Hua Lu",
  title =        "{VIP-Tree}: an effective index for indoor spatial
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "325--336",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025115",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Due to the growing popularity of indoor location-based
                 services, indoor data management has received
                 significant research attention in the past few years.
                 However, we observe that the existing indexing and
                 query processing techniques for the indoor space do not
                 fully exploit the properties of the indoor space.
                 Consequently, they provide below par performance which
                 makes them unsuitable for large indoor venues with high
                 query workloads. In this paper, we propose two novel
                 indexes called Indoor Partitioning Tree (IP-Tree) and
                 Vivid IP-Tree (VIP-Tree) that are carefully designed by
                 utilizing the properties of indoor venues. The proposed
                 indexes are lightweight, have small pre-processing cost
                 and provide near-optimal performance for shortest
                 distance and shortest path queries. We also present
                 efficient algorithms for other spatial queries such as
                 k nearest neighbors queries and range queries. Our
                 extensive experimental study on real and synthetic data
                 sets demonstrates that our proposed indexes outperform
                 the existing algorithms by several orders of
                 magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arulraj:2016:WBL,
  author =       "Joy Arulraj and Matthew Perron and Andrew Pavlo",
  title =        "Write-behind logging",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "337--348",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025116",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The design of the logging and recovery components of
                 database management systems (DBMSs) has always been
                 influenced by the difference in the performance
                 characteristics of volatile (DRAM) and non-volatile
                 storage devices (HDD/SSDs). The key assumption has been
                 that non-volatile storage is much slower than DRAM and
                 only supports block-oriented read/writes. But the
                 arrival of new non-volatile memory (NVM) storage that
                 is almost as fast as DRAM with fine-grained read/writes
                 invalidates these previous design choices. This paper
                 explores the changes that are required in a DBMS to
                 leverage the unique properties of NVM in systems that
                 still include volatile DRAM. We make the case for a new
                 logging and recovery protocol, called write-behind
                 logging, that enables a DBMS to recover nearly
                 instantaneously from system failures. The key idea is
                 that the DBMS logs what parts of the database have
                 changed rather than how it was changed. Using this
                 method, the DBMS flushes the changes to the database {
                 before} recording them in the log. Our evaluation shows
                 that this protocol improves a DBMS's transactional
                 throughput by 1.3$ \times $, reduces the recovery time
                 by more than two orders of magnitude, and shrinks the
                 storage footprint of the DBMS on NVM by 1.5$ \times $.
                 We also demonstrate that our logging protocol is
                 compatible with standard replication schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Papadopoulos:2016:TAD,
  author =       "Stavros Papadopoulos and Kushal Datta and Samuel
                 Madden and Timothy Mattson",
  title =        "The {TileDB} array data storage manager",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "349--360",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025117",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a novel storage manager for
                 multi-dimensional arrays that arise in scientific
                 applications, which is part of a larger scientific data
                 management system called TileDB. In contrast to
                 existing solutions, TileDB is optimized for both dense
                 and sparse arrays. Its key idea is to organize array
                 elements into ordered collections called fragments.
                 Each fragment is dense or sparse, and groups contiguous
                 array elements into data tiles of fixed capacity. The
                 organization into fragments turns random writes into
                 sequential writes, and, coupled with a novel read
                 algorithm, leads to very efficient reads. TileDB
                 enables parallelization via multi-threading and
                 multi-processing, offering thread-/process-safety and
                 atomicity via lightweight locking. We show that TileDB
                 delivers comparable performance to the HDF5 dense array
                 storage manager, while providing much faster random
                 writes. We also show that TileDB offers substantially
                 faster reads and writes than the SciDB array database
                 system with both dense and sparse arrays. Finally, we
                 demonstrate that TileDB is considerably faster than
                 adaptations of the Vertica relational column-store for
                 dense array storage management, and at least as fast
                 for the case of sparse arrays.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zheng:2016:DDA,
  author =       "Yudian Zheng and Guoliang Li and Reynold Cheng",
  title =        "{DOCS}: a domain-aware crowdsourcing system using
                 knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "361--372",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025118",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Crowdsourcing is a new computing paradigm that
                 harnesses human effort to solve computer-hard problems,
                 such as entity resolution and photo tagging. The crowd
                 (or workers) have diverse qualities and it is important
                 to effectively model a worker's quality. Most of
                 existing worker models assume that workers have the
                 same quality on different tasks. In practice, however,
                 tasks belong to a variety of diverse domains, and
                 workers have different qualities on different domains.
                 For example, a worker who is a basketball fan should
                 have better quality for the task of labeling a photo
                 related to ' Stephen Curry ' than the one related to '
                 Leonardo DiCaprio '. In this paper, we study how to
                 leverage domain knowledge to accurately model a
                 worker's quality. We examine using knowledge base (KB),
                 e.g., Wikipedia and Freebase, to detect the domains of
                 tasks and workers. We develop Domain Vector Estimation,
                 which analyzes the domains of a task with respect to
                 the KB. We also study Truth Inference, which utilizes
                 the domain-sensitive worker model to accurately infer
                 the true answer of a task. We design an Online Task
                 Assignment algorithm, which judiciously and efficiently
                 assigns tasks to appropriate workers. To implement
                 these solutions, we have built DOCS, a system deployed
                 on the Amazon Mechanical Turk. Experiments show that
                 DOCS performs much better than the state-of-the-art
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2016:LHC,
  author =       "Yue Wang and Alexandra Meliou and Gerome Miklau",
  title =        "Lifting the haze off the cloud: a consumer-centric
                 market for database computation in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "373--384",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025119",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The availability of public computing resources in the
                 cloud has revolutionized data analysis, but requesting
                 cloud resources often involves complex decisions for
                 consumers. Estimating the completion time and cost of a
                 computation and requesting the appropriate cloud
                 resources are challenging tasks even for an expert
                 user. We propose a new market-based framework for
                 pricing computational tasks in the cloud. Our framework
                 introduces an agent between consumers and cloud
                 providers. The agent takes data and computational tasks
                 from users, estimates time and cost for evaluating the
                 tasks, and returns to consumers contracts that specify
                 the price and completion time. Our framework can be
                 applied directly to existing cloud markets without
                 altering the way cloud providers offer and price
                 services. In addition, it simplifies cloud use for
                 consumers by allowing them to compare contracts, rather
                 than choose resources directly. We present design,
                 analytical, and algorithmic contributions focusing on
                 pricing computation contracts, analyzing their
                 properties, and optimizing them in complex workflows.
                 We conduct an experimental evaluation of our market
                 framework over a real-world cloud service and
                 demonstrate empirically that our market ensures three
                 key properties: (a) that consumers benefit from using
                 the market due to competitiveness among agents, (b)
                 that agents have an incentive to price contracts
                 fairly, and (c) that inaccuracies in estimates do not
                 pose a significant risk to agents' profits. Finally, we
                 present a fine-grained pricing mechanism for complex
                 workflows and show that it can increase agent profits
                 by more than an order of magnitude in some cases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2016:TBO,
  author =       "Jia Yu and Mohamed Sarwat",
  title =        "Two birds, one stone: a fast, yet lightweight,
                 indexing scheme for modern database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "385--396",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025120",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Classic database indexes (e.g., B$^+$ -Tree), though
                 speed up queries, suffer from two main drawbacks: (1)
                 An index usually yields 5\% to 15\% additional storage
                 overhead which results in non-ignorable dollar cost in
                 big data scenarios especially when deployed on modern
                 storage devices. (2) Maintaining an index incurs high
                 latency because the DBMS has to locate and update those
                 index pages affected by the underlying table changes.
                 This paper proposes Hippo a fast, yet scalable,
                 database indexing approach. It significantly shrinks
                 the index storage and mitigates maintenance overhead
                 without compromising much on the query execution
                 performance. Hippo stores disk page ranges instead of
                 tuple pointers in the indexed table to reduce the
                 storage space occupied by the index. It maintains
                 simplified histograms that represent the data
                 distribution and adopts a page grouping technique that
                 groups contiguous pages into page ranges based on the
                 similarity of their index key attribute distributions.
                 When a query is issued, Hippo leverages the page ranges
                 and histogram-based page summaries to recognize those
                 pages such that their tuples are guaranteed not to
                 satisfy the query predicates and inspects the remaining
                 pages. Experiments based on real and synthetic datasets
                 show that Hippo occupies up to two orders of magnitude
                 less storage space than that of the B$^+$ -Tree while
                 still achieving comparable query execution performance
                 to that of the B$^+$ -Tree for 0.1\% --- 1\%
                 selectivity factors. Also, the experiments show that
                 Hippo outperforms BRIN (Block Range Index) in executing
                 queries with various selectivity factors. Furthermore,
                 Hippo achieves up to three orders of magnitude less
                 maintenance overhead and up to an order of magnitude
                 higher throughput (for hybrid query/update workloads)
                 than its counterparts.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2016:HMF,
  author =       "Zheng Li and Tingjian Ge",
  title =        "History is a mirror to the future: best-effort
                 approximate complex event matching with insufficient
                 resources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "397--408",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025121",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Complex event processing (CEP) has proven to be a
                 highly relevant topic in practice. As it is sensitive
                 to both errors in the stream and uncertainty in the
                 pattern, approximate complex event processing (ACEP) is
                 an important direction but has not been adequately
                 studied before. ACEP is costly, and is often performed
                 under insufficient computing resources. We propose an
                 algorithm that learns from the past behavior of ACEP
                 runs, and makes decisions on what to process first in
                 an online manner, so as to maximize the number of full
                 matches found. In addition, we devise effective
                 optimization techniques. Finally, we propose a
                 mechanism that uses reinforcement learning to
                 dynamically update the history structure without
                 incurring much overhead. Put together, these techniques
                 drastically improve the fraction of full matches found
                 in resource constrained environments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Avni:2016:PHT,
  author =       "Hillel Avni and Trevor Brown",
  title =        "Persistent hybrid transactional memory for databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "409--420",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025122",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Processors with hardware support for transactional
                 memory (HTM) are rapidly becoming commonplace, and
                 processor manufacturers are currently working on
                 implementing support for upcoming non-volatile memory
                 (NVM) technologies. The combination of HTM and NVM
                 promises to be a natural choice for in-memory database
                 synchronization. However, limitations on the size of
                 hardware transactions and the lack of progress
                 guarantees by modern HTM implementations prevent some
                 applications from obtaining the full benefit of
                 hardware transactional memory. In this paper, we
                 propose a persistent hybrid TM algorithm called PHyTM
                 for systems that support NVM and HTM. PHyTM allows
                 hardware assisted ACID transactions to execute
                 concurrently with pure software transactions, which
                 allows applications to gain the benefit of persistent
                 HTM while simultaneously accommodating unbounded
                 transactions (with a high degree of concurrency).
                 Experimental simulations demonstrate that PHyTM is fast
                 and scalable for realistic workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2016:SOP,
  author =       "Liwen Sun and Michael J. Franklin and Jiannan Wang and
                 Eugene Wu",
  title =        "Skipping-oriented partitioning for columnar layouts",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "421--432",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025123",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As data volumes continue to grow, modern database
                 systems increasingly rely on data skipping mechanisms
                 to improve performance by avoiding access to irrelevant
                 data. Recent work [39] proposed a fine-grained
                 partitioning scheme that was shown to improve the
                 opportunities for data skipping in row-oriented
                 systems. Modern analytics and big data systems
                 increasingly adopt columnar storage schemes, and in
                 such systems, a row-based approach misses important
                 opportunities for further improving data skipping. The
                 flexibility of column-oriented organizations, however,
                 comes with the additional cost of tuple reconstruction.
                 In this paper, we develop Generalized Skipping-Oriented
                 Partitioning (GSOP), a novel hybrid data skipping
                 framework that takes into account these row-based and
                 column-based tradeoffs. In contrast to previous
                 column-oriented physical design work, GSOP considers
                 the tradeoffs between horizontal data skipping and
                 vertical partitioning jointly. Our experiments using
                 two public benchmarks and a real-world workload show
                 that GSOP can significantly reduce the amount of data
                 scanned and improve end-to-end query response times
                 over the state-of-the- art techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Singh:2016:EQU,
  author =       "Sneha Aman Singh and Divesh Srivastava and Srikanta
                 Tirthapura",
  title =        "Estimating quantiles from the union of historical and
                 streaming data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "433--444",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025124",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern enterprises generate huge amounts of streaming
                 data, for example, micro-blog feeds, financial data,
                 network monitoring and industrial application
                 monitoring. While Data Stream Management Systems have
                 proven successful in providing support for real-time
                 alerting, many applications, such as network monitoring
                 for intrusion detection and real-time bidding, require
                 complex analytics over historical and real-time data
                 over the data streams. We present a new method to
                 process one of the most fundamental analytical
                 primitives, quantile queries, on the union of
                 historical and streaming data. Our method combines an
                 index on historical data with a memory-efficient sketch
                 on streaming data to answer quantile queries with
                 accuracy-resource tradeoffs that are significantly
                 better than current solutions that are based solely on
                 disk-resident indexes or solely on streaming
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Serafini:2016:CFG,
  author =       "Marco Serafini and Rebecca Taft and Aaron J. Elmore
                 and Andrew Pavlo and Ashraf Aboulnaga and Michael
                 Stonebraker",
  title =        "{Clay}: fine-grained adaptive partitioning for general
                 database schemas",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "445--456",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025125",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Transaction processing database management systems
                 (DBMSs) are critical for today's data-intensive
                 applications because they enable an organization to
                 quickly ingest and query new information. Many of these
                 applications exceed the capabilities of a single
                 server, and thus their database has to be deployed in a
                 distributed DBMS. The key factor affecting such a
                 system's performance is how the database is
                 partitioned. If the database is partitioned
                 incorrectly, the number of distributed transactions can
                 be high. These transactions have to synchronize their
                 operations over the network, which is considerably
                 slower and leads to poor performance. Previous work on
                 elastic database repartitioning has focused on a
                 certain class of applications whose database schema can
                 be represented in a hierarchical tree structure. But
                 many applications cannot be partitioned in this manner,
                 and thus are subject to distributed transactions that
                 impede their performance and scalability. In this
                 paper, we present a new on-line partitioning approach,
                 called Clay, that supports both tree-based schemas and
                 more complex ``general'' schemas with arbitrary foreign
                 key relationships. Clay dynamically creates blocks of
                 tuples to migrate among servers during repartitioning,
                 placing no constraints on the schema but taking care to
                 balance load and reduce the amount of data migrated.
                 Clay achieves this goal by including in each block a
                 set of hot tuples and other tuples co-accessed with
                 these hot tuples. To evaluate our approach, we
                 integrate Clay in a distributed, main-memory DBMS and
                 show that it can generate partitioning schemes that
                 enable the system to achieve up to 15$ \times $ better
                 throughput and 99\% lower latency than existing
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Siddiqui:2016:EDE,
  author =       "Tarique Siddiqui and Albert Kim and John Lee and
                 Karrie Karahalios and Aditya Parameswaran",
  title =        "Effortless data exploration with zenvisage: an
                 expressive and interactive visual analytics system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "4",
  pages =        "457--468",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3025111.3025126",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data visualization is by far the most commonly used
                 mechanism to explore and extract insights from
                 datasets, especially by novice data scientists. And
                 yet, current visual analytics tools are rather limited
                 in their ability to operate on collections of
                 visualizations---by composing, filtering, comparing,
                 and sorting them---to find those that depict desired
                 trends or patterns. The process of visual data
                 exploration remains a tedious process of
                 trial-and-error. We propose zenvisage, a visual
                 analytics platform for effortlessly finding desired
                 visual patterns from large datasets. We introduce
                 zenvisage's general purpose visual exploration
                 language, ZQL (``zee-quel'') for specifying the desired
                 visual patterns, drawing from use-cases in a variety of
                 domains, including biology, mechanical engineering,
                 climate science, and commerce. We formalize the
                 expressiveness of ZQL via a visual exploration
                 algebra---an algebra on collections of
                 visualizations---and demonstrate that ZQL is as
                 expressive as that algebra. zenvisage exposes an
                 interactive front-end that supports the issuing of ZQL
                 queries, and also supports interactions that are
                 ``short-cuts'' to certain commonly used ZQL queries. To
                 execute these queries, zenvisage uses a novel ZQL
                 graph-based query optimizer that leverages a suite of
                 optimizations tailored to the goal of processing
                 collections of visualizations in certain pre-defined
                 ways. Lastly, a user survey and study demonstrates that
                 data scientists are able to effectively use zenvisage
                 to eliminate error-prone and tedious exploration and
                 directly identify desired visualizations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ceccarello:2017:MSA,
  author =       "Matteo Ceccarello and Andrea Pietracaprina and Geppino
                 Pucci and Eli Upfal",
  title =        "{MapReduce} and streaming algorithms for diversity
                 maximization in metric spaces of bounded doubling
                 dimension",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "469--480",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a dataset of points in a metric space and an
                 integer $k$, a diversity maximization problem requires
                 determining a subset of $k$ points maximizing some
                 diversity objective measure, e.g., the minimum or the
                 average distance between two points in the subset.
                 Diversity maximization is computationally hard, hence
                 only approximate solutions can be hoped for. Although
                 its applications are mainly in massive data analysis,
                 most of the past research on diversity maximization
                 focused on the sequential setting. In this work we
                 present space and pass/round-efficient diversity
                 maximization algorithms for the Streaming and MapReduce
                 models and analyze their approximation guarantees for
                 the relevant class of metric spaces of bounded doubling
                 dimension. Like other approaches in the literature, our
                 algorithms rely on the determination of high-quality
                 core-sets, i.e., (much) smaller subsets of the input
                 which contain good approximations to the optimal
                 solution for the whole input. For a variety of
                 diversity objective functions, our algorithms attain an
                 $ (\alpha + \epsilon)$-approximation ratio, for any
                 constant $ \epsilon > 0$, where $ \alpha $ is the best
                 approximation ratio achieved by a polynomial-time,
                 linear-space sequential algorithm for the same
                 diversity objective. This improves substantially over
                 the approximation ratios attainable in Streaming and
                 MapReduce by state-of-the-art algorithms for general
                 metric spaces. We provide extensive experimental
                 evidence of the effectiveness of our algorithms on both
                 real world and synthetic datasets, scaling up to over a
                 billion points.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bindschaedler:2017:PDP,
  author =       "Vincent Bindschaedler and Reza Shokri and Carl A.
                 Gunter",
  title =        "Plausible deniability for privacy-preserving data
                 synthesis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "481--492",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Releasing full data records is one of the most
                 challenging problems in data privacy. On the one hand,
                 many of the popular techniques such as data
                 de-identification are problematic because of their
                 dependence on the background knowledge of adversaries.
                 On the other hand, rigorous methods such as the
                 exponential mechanism for differential privacy are
                 often computationally impractical to use for releasing
                 high dimensional data or cannot preserve high utility
                 of original data due to their extensive data
                 perturbation. This paper presents a criterion called
                 plausible deniability that provides a formal privacy
                 guarantee, notably for releasing sensitive datasets: an
                 output record can be released only if a certain amount
                 of input records are indistinguishable, up to a privacy
                 parameter. This notion does not depend on the
                 background knowledge of an adversary. Also, it can
                 efficiently be checked by privacy tests. We present
                 mechanisms to generate synthetic datasets with similar
                 statistical properties to the input data and the same
                 format. We study this technique both theoretically and
                 experimentally. A key theoretical result shows that,
                 with proper randomization, the plausible deniability
                 mechanism generates differentially private synthetic
                 data. We demonstrate the efficiency of this generative
                 technique on a large dataset; it is shown to preserve
                 the utility of original data with respect to various
                 statistical analysis and machine learning measures.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Verma:2017:ECP,
  author =       "Shiv Verma and Luke M. Leslie and Yosub Shin and
                 Indranil Gupta",
  title =        "An experimental comparison of partitioning strategies
                 in distributed graph processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "493--504",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we study the problem of choosing among
                 partitioning strategies in distributed graph processing
                 systems. To this end, we evaluate and characterize both
                 the performance and resource usage of different
                 partitioning strategies under various popular
                 distributed graph processing systems, applications,
                 input graphs, and execution environments. Through our
                 experiments, we found that no single partitioning
                 strategy is the best fit for all situations, and that
                 the choice of partitioning strategy has a significant
                 effect on resource usage and application run-time. Our
                 experiments demonstrate that the choice of partitioning
                 strategy depends on (1) the degree distribution of
                 input graph, (2) the type and duration of the
                 application, and (3) the cluster size. Based on our
                 results, we present rules of thumb to help users pick
                 the best partitioning strategy for their particular use
                 cases. We present results from each system, as well as
                 from all partitioning strategies implemented in one
                 common system (PowerLyra).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2017:SPR,
  author =       "Badrish Chandramouli and Jonathan Goldstein",
  title =        "Shrink: prescribing resiliency solutions for
                 streaming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "505--516",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Streaming query deployments make up a vital part of
                 cloud oriented applications. They vary widely in their
                 data, logic, and statefulness, and are typically
                 executed in multi-tenant distributed environments with
                 varying uptime SLAs. In order to achieve these SLAs,
                 one of a number of proposed resiliency strategies is
                 employed to protect against failure. This paper has
                 introduced the first, comprehensive, cloud friendly
                 comparison between different resiliency techniques for
                 streaming queries. In this paper, we introduce models
                 which capture the costs associated with different
                 resiliency strategies, and through a series of
                 experiments which implement and validate these models,
                 show that (1) there is no single resiliency strategy
                 which efficiently handles most streaming scenarios; (2)
                 the optimization space is too complex for a person to
                 employ a ``rules of thumb'' approach; and (3) there
                 exists a clear generalization of periodic checkpointing
                 that is worth considering in many cases. Finally, the
                 models presented in this paper can be adapted to fit a
                 wide variety of resiliency strategies, and likely have
                 important consequences for cloud services beyond those
                 that are obviously streaming.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Barthels:2017:DJA,
  author =       "Claude Barthels and Ingo M{\"u}ller and Timo Schneider
                 and Gustavo Alonso and Torsten Hoefler",
  title =        "Distributed join algorithms on thousands of cores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "517--528",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional database operators such as joins are
                 relevant not only in the context of database engines
                 but also as a building block in many computational and
                 machine learning algorithms. With the advent of big
                 data, there is an increasing demand for efficient join
                 algorithms that can scale with the input data size and
                 the available hardware resources. In this paper, we
                 explore the implementation of distributed join
                 algorithms in systems with several thousand cores
                 connected by a low-latency network as used in high
                 performance computing systems or data centers. We
                 compare radix hash join to sort-merge join algorithms
                 and discuss their implementation at this scale. In the
                 paper, we explain how to use MPI to implement joins,
                 show the impact and advantages of RDMA, discuss the
                 importance of network scheduling, and study the
                 relative performance of sorting vs. hashing. The
                 experimental results show that the algorithms we
                 present scale well with the number of cores, reaching a
                 throughput of 48.7 billion input tuples per second on
                 4,096 cores.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2017:CBS,
  author =       "Junling Liu and Ke Deng and Huanliang Sun and Yu Ge
                 and Xiaofang Zhou and Christian S. Jensen",
  title =        "Clue-based spatio-textual query",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "529--540",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Along with the proliferation of online digital map and
                 location-based service, very large POI (point of
                 interest) databases have been constructed where a
                 record corresponds to a POI with information including
                 name, category, address, geographical location and
                 other features. A basic spatial query in POI database
                 is POI retrieval. In many scenarios, a user cannot
                 provide enough information to pinpoint the POI except
                 some clue. For example, a user wants to identify a caf
                 {\'e} in a city visited many years ago. SHe cannot
                 remember the name and address but she still recalls
                 that ``the caf {\'e} is about 200 meters away from a
                 restaurant; and turning left at the restaurant there is
                 a bakery 500 meters away, etc.''. Intuitively, the
                 clue, even partial and approximate, describes the
                 spatio-textual context around the targeted POI.
                 Motivated by this observation, this work investigates
                 clue-based spatio-textual query which allows user
                 providing clue, i.e., some nearby POIs and the spatial
                 relationships between them, in POI retrieval. The
                 objective is to retrieve k POIs from a POI database
                 with the highest spatio-textual context similarities
                 against the clue. This work has deliberately designed
                 data-quality-tolerant spatio-textual context similarity
                 metric to cope with various data quality problems in
                 both the clue and the POI database. Through crossing
                 valuation, the query accuracy is further enhanced by
                 ensemble method. Also, this work has developed an index
                 called roll-out-star R-tree (RSR-tree) to dramatically
                 improve the query processing efficiency. The extensive
                 tests on data sets from the real world have verified
                 the superiority of our methods in all aspects.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zheng:2017:TIC,
  author =       "Yudian Zheng and Guoliang Li and Yuanbing Li and
                 Caihua Shan and Reynold Cheng",
  title =        "Truth inference in crowdsourcing: is the problem
                 solved?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "541--552",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Crowdsourcing has emerged as a novel problem-solving
                 paradigm, which facilitates addressing problems that
                 are hard for computers, e.g., entity resolution and
                 sentiment analysis. However, due to the openness of
                 crowdsourcing, workers may yield low-quality answers,
                 and a redundancy-based method is widely employed, which
                 first assigns each task to multiple workers and then
                 infers the correct answer (called truth) for the task
                 based on the answers of the assigned workers. A
                 fundamental problem in this method is Truth Inference,
                 which decides how to effectively infer the truth.
                 Recently, the database community and data mining
                 community independently study this problem and propose
                 various algorithms. However, these algorithms are not
                 compared extensively under the same framework and it is
                 hard for practitioners to select appropriate
                 algorithms. To alleviate this problem, we provide a
                 detailed survey on 17 existing algorithms and perform a
                 comprehensive evaluation using 5 real datasets. We make
                 all codes and datasets public for future research.
                 Through experiments we find that existing algorithms
                 are not stable across different datasets and there is
                 no algorithm that outperforms others consistently. We
                 believe that the truth inference problem is not fully
                 solved, and identify the limitations of existing
                 algorithms and point out promising research
                 directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Harding:2017:EDC,
  author =       "Rachael Harding and Dana {Van Aken} and Andrew Pavlo
                 and Michael Stonebraker",
  title =        "An evaluation of distributed concurrency control",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "553--564",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Increasing transaction volumes have led to a
                 resurgence of interest in distributed transaction
                 processing. In particular, partitioning data across
                 several servers can improve throughput by allowing
                 servers to process transactions in parallel. But
                 executing transactions across servers limits the
                 scalability and performance of these systems. In this
                 paper, we quantify the effects of distribution on
                 concurrency control protocols in a distributed
                 environment. We evaluate six classic and modern
                 protocols in an in-memory distributed database
                 evaluation framework called Deneva, providing an
                 apples-to-apples comparison between each. Our results
                 expose severe limitations of distributed transaction
                 processing engines. Moreover, in our analysis, we
                 identify several protocol-specific scalability
                 bottlenecks. We conclude that to achieve truly scalable
                 operation, distributed concurrency control solutions
                 must seek a tighter coupling with either novel network
                 hardware (in the local area) or applications (via data
                 modeling and semantically-aware execution), or both.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cui:2017:KLQ,
  author =       "Wanyun Cui and Yanghua Xiao and Haixun Wang and
                 Yangqiu Song and Seung-won Hwang and Wei Wang",
  title =        "{KBQA}: learning question answering over {QA} corpora
                 and knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "565--576",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Question answering (QA) has become a popular way for
                 humans to access billion-scale knowledge bases. Unlike
                 web search, QA over a knowledge base gives out accurate
                 and concise results, provided that natural language
                 questions can be understood and mapped precisely to
                 structured queries over the knowledge base. The
                 challenge, however, is that a human can ask one
                 question in many different ways. Previous approaches
                 have natural limits due to their representations: rule
                 based approaches only understand a small set of
                 ``canned'' questions, while keyword based or synonym
                 based approaches cannot fully understand the questions.
                 In this paper, we design a new kind of question
                 representation: templates, over a billion scale
                 knowledge base and a million scale QA corpora. For
                 example, for questions about a city's population, we
                 learn templates such as What's the population of city?,
                 How many people are there in city?. We learned 27
                 million templates for 2782 intents. Based on these
                 templates, our QA system KBQA effectively supports
                 binary factoid questions, as well as complex questions
                 which are composed of a series of binary factoid
                 questions. Furthermore, we expand predicates in RDF
                 knowledge base, which boosts the coverage of knowledge
                 base by 57 times. Our QA system beats all other
                 state-of-art works on both effectiveness and efficiency
                 over QALD benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2017:PNL,
  author =       "Daniel Deutch and Nave Frost and Amir Gilad",
  title =        "Provenance for natural language queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "577--588",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multiple lines of research have developed Natural
                 Language (NL) interfaces for formulating database
                 queries. We build upon this work, but focus on
                 presenting a highly detailed form of the answers in NL.
                 The answers that we present are importantly based on
                 the provenance of tuples in the query result, detailing
                 not only the results but also their explanations. We
                 develop a novel method for transforming provenance
                 information to NL, by leveraging the original NL query
                 structure. Furthermore, since provenance information is
                 typically large and complex, we present two solutions
                 for its effective presentation as NL text: one that is
                 based on provenance factorization, with novel
                 desiderata relevant to the NL case, and one that is
                 based on summarization. We have implemented our
                 solution in an end-to-end system supporting questions,
                 answers and provenance, all expressed in NL. Our
                 experiments, including a user study, indicate the
                 quality of our solution and its scalability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2017:AAP,
  author =       "Yi Lu and Anil Shanbhag and Alekh Jindal and Samuel
                 Madden",
  title =        "{AdaptDB}: adaptive partitioning for distributed
                 joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "589--600",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Big data analytics often involves complex join queries
                 over two or more tables. Such join processing is
                 expensive in a distributed setting both because large
                 amounts of data must be read from disk, and because of
                 data shuffling across the network. Many techniques
                 based on data partitioning have been proposed to reduce
                 the amount of data that must be accessed, often
                 focusing on finding the best partitioning scheme for a
                 particular workload, rather than adapting to changes in
                 the workload over time. In this paper, we present
                 AdaptDB, an adaptive storage manager for analytical
                 database workloads in a distributed setting. It works
                 by partitioning datasets across a cluster and
                 incrementally refining data partitioning as queries are
                 run. AdaptDB introduces a novel hyper-join that avoids
                 expensive data shuffling by identifying storage blocks
                 of the joining tables that overlap on the join
                 attribute, and only joining those blocks. Hyper-join
                 performs well when each block in one table overlaps
                 with few blocks in the other table, since that will
                 minimize the number of blocks that have to be accessed.
                 To minimize the number of overlapping blocks for common
                 join queries, AdaptDB users smooth repartitioning to
                 repartition small portions of the tables on join
                 attributes as queries run. A prototype of AdaptDB
                 running on top of Spark improves query performance by
                 2--3x on TPC-H as well as real-world dataset, versus a
                 system that employs scans and shuffle-joins.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2017:EES,
  author =       "Zhipeng Zhang and Yingxia Shao and Bin Cui and Ce
                 Zhang",
  title =        "An experimental evaluation of {SimRank}-based
                 similarity search algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "601--612",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a graph, SimRank is one of the most popular
                 measures of the similarity between two vertices. We
                 focus on efficiently calculating SimRank, which has
                 been studied intensively over the last decade. This has
                 led to many algorithms that efficiently calculate or
                 approximate SimRank being proposed by researchers.
                 Despite these abundant research efforts, there is no
                 systematic comparison of these algorithms. In this
                 paper, we conduct a study to compare these algorithms
                 to understand their pros and cons. We first introduce a
                 taxonomy for different algorithms that calculate
                 SimRank and classify each algorithm into one of the
                 following three classes, namely, iterative-,
                 non-iterative-, and random walk-based method. We
                 implement ten algorithms published from 2002 to 2015,
                 and compare them using synthetic and real-world graphs.
                 To ensure the fairness of our study, our
                 implementations use the same data structure and
                 execution framework, and we try our best to optimize
                 each of these algorithms. Our study reveals that none
                 of these algorithms dominates the others: algorithms
                 based on iterative method often have higher accuracy
                 while algorithms based on random walk can be more
                 scalable. One noniterative algorithm has good
                 effectiveness and efficiency on graphs with medium
                 size. Thus, depending on the requirements of different
                 applications, the optimal choice of algorithms differs.
                 This paper provides an empirical guideline for making
                 such choices.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Faleiro:2017:HPT,
  author =       "Jose M. Faleiro and Daniel J. Abadi and Joseph M.
                 Hellerstein",
  title =        "High performance transactions via early write
                 visibility",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "613--624",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In order to guarantee recoverable transaction
                 execution, database systems permit a transaction's
                 writes to be observable only at the end of its
                 execution. As a consequence, there is generally a delay
                 between the time a transaction performs a write and the
                 time later transactions are permitted to read it. This
                 delayed write visibility can significantly impact the
                 performance of serializable database systems by
                 reducing concurrency among conflicting transactions.
                 This paper makes the observation that delayed write
                 visibility stems from the fact that database systems
                 can arbitrarily abort transactions at any point during
                 their execution. Accordingly, we make the case for
                 database systems which only abort transactions under a
                 restricted set of conditions, thereby enabling a new
                 recoverability mechanism, early write visibility, which
                 safely makes transactions' writes visible prior to the
                 end of their execution. We design a new serializable
                 concurrency control protocol, piece-wise visibility
                 (PWV), with the explicit goal of enabling early write
                 visibility. We evaluate PWV against state-of-the-art
                 serializable protocols and a highly optimized
                 implementation of read committed, and find that PWV can
                 outperform serializable protocols by an order of
                 magnitude and read committed by 3X on high contention
                 workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eswaran:2017:ZBP,
  author =       "Dhivya Eswaran and Stephan G{\"u}nnemann and Christos
                 Faloutsos and Disha Makhija and Mohit Kumar",
  title =        "{ZooBP}: belief propagation for heterogeneous
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "5",
  pages =        "625--636",
  month =        jan,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a heterogeneous network, with nodes of different
                 types --- e.g., products, users and sellers from an
                 online recommendation site like Amazon --- and labels
                 for a few nodes ('honest', 'suspicious', etc), can we
                 find a closed formula for Belief Propagation (BP),
                 exact or approximate? Can we say whether it will
                 converge? BP, traditionally an inference algorithm for
                 graphical models, exploits so-called ``network
                 effects'' to perform graph classification tasks when
                 labels for a subset of nodes are provided; and it has
                 been successful in numerous settings like fraudulent
                 entity detection in online retailers and classification
                 in social networks. However, it does not have a
                 closed-form nor does it provide convergence guarantees
                 in general. We propose ZooBP, a method to perform fast
                 BP on undirected heterogeneous graphs with provable
                 convergence guarantees. ZooBP has the following
                 advantages: (1) Generality: It works on heterogeneous
                 graphs with multiple types of nodes and edges; (2)
                 Closed-form solution: ZooBP gives a closed-form
                 solution as well as convergence guarantees; (3)
                 Scalability: ZooBP is linear on the graph size and is
                 up to 600$ \times $ faster than BP, running on graphs
                 with 3.3 million edges in a few seconds. (4)
                 Effectiveness: Applied on real data (a Flipkart
                 e-commerce network with users, products and sellers),
                 ZooBP identifies fraudulent users with a near-perfect
                 precision of 92.3 \% over the top 300 results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lyu:2017:USV,
  author =       "Min Lyu and Dong Su and Ninghui Li",
  title =        "Understanding the sparse vector technique for
                 differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "6",
  pages =        "637--648",
  month =        feb,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Sparse Vector Technique (SVT) is a fundamental
                 technique for satisfying differential privacy and has
                 the unique quality that one can output some query
                 answers without apparently paying any privacy cost. SVT
                 has been used in both the interactive setting, where
                 one tries to answer a sequence of queries that are not
                 known ahead of the time, and in the non-interactive
                 setting, where all queries are known. Because of the
                 potential savings on privacy budget, many variants for
                 SVT have been proposed and employed in
                 privacy-preserving data mining and publishing. However,
                 most variants of SVT are actually not private. In this
                 paper, we analyze these errors and identify the
                 misunderstandings that likely contribute to them. We
                 also propose a new version of SVT that provides better
                 utility, and introduce an effective technique to
                 improve the performance of SVT. These enhancements can
                 be applied to improve utility in the interactive
                 setting. Through both analytical and experimental
                 comparisons, we show that, in the non-interactive
                 setting (but not the interactive setting), the SVT
                 technique is unnecessary, as it can be replaced by the
                 Exponential Mechanism (EM) with better accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2017:OEA,
  author =       "Fan Zhang and Wenjie Zhang and Ying Zhang and Lu Qin
                 and Xuemin Lin",
  title =        "{OLAK}: an efficient algorithm to prevent unraveling
                 in social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "6",
  pages =        "649--660",
  month =        feb,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we study the problem of the anchored
                 $k$-core. Given a graph $G$, an integer $k$ and a
                 budget $b$, we aim to identify $b$ vertices in $G$ so
                 that we can determine the largest induced subgraph $J$
                 in which every vertex, except the $b$ vertices, has at
                 least $k$ neighbors in $J$. This problem was introduced
                 by Bhawalkar and Kleinberg et al. in the context of
                 user engagement in social networks, where a user may
                 leave a community if he/she has less than $k$ friends
                 engaged. The problem has been shown to be NP-hard and
                 inapproximable. A polynomial-time algorithm for graphs
                 with bounded tree-width has been proposed. However,
                 this assumption usually does not hold in real-life
                 graphs, and their techniques cannot be extended to
                 handle general graphs. Motivated by this, we propose an
                 efficient algorithm, namely onion-layer based anchored
                 $k$-core (OLAK), for the anchored $k$ core problem on
                 large scale graphs. To facilitate computation of the
                 anchored $k$-core, we design an onion layer structure,
                 which is generated by a simple onion-peeling-like
                 algorithm against a small set of vertices in the graph.
                 We show that computation of the best anchor can simply
                 be conducted upon the vertices on the onion layers,
                 which significantly reduces the search space. Based on
                 the well-organized layer structure, we develop
                 efficient candidates exploration, early termination and
                 pruning techniques to further speed up computation.
                 Comprehensive experiments on 10 real-life graphs
                 demonstrate the effectiveness and efficiency of our
                 proposed methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khan:2017:DTI,
  author =       "Meraj Khan and Larry Xu and Arnab Nandi and Joseph M.
                 Hellerstein",
  title =        "Data tweening: incremental visualization of data
                 transforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "6",
  pages =        "661--672",
  month =        feb,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the context of interactive query sessions, it is
                 common to issue a succession of queries, transforming a
                 dataset to the desired result. It is often difficult to
                 comprehend a succession of transformations, especially
                 for complex queries. Thus, to facilitate understanding
                 of each data transformation and to provide continuous
                 feedback, we introduce the concept of ``data
                 tweening'', i.e., interpolating between resultsets,
                 presenting to the user a series of incremental visual
                 representations of a resultset transformation. We
                 present tweening methods that consider not just the
                 changes in the result, but also the changes in the
                 query. Through user studies, we show that data tweening
                 allows users to efficiently comprehend data transforms,
                 and also enables them to gain a better understanding of
                 the underlying query operations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bater:2017:SSQ,
  author =       "Johes Bater and Gregory Elliott and Craig Eggen and
                 Satyender Goel and Abel Kho and Jennie Rogers",
  title =        "{SMCQL}: secure querying for federated databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "6",
  pages =        "673--684",
  month =        feb,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "People and machines are collecting data at an
                 unprecedented rate. Despite this newfound abundance of
                 data, progress has been slow in sharing it for open
                 science, business, and other data-intensive endeavors.
                 Many such efforts are stymied by privacy concerns and
                 regulatory compliance issues. For example, many
                 hospitals are interested in pooling their medical
                 records for research, but none may disclose arbitrary
                 patient records to researchers or other healthcare
                 providers. In this context we propose the Private Data
                 Network (PDN), a federated database for querying over
                 the collective data of mutually distrustful parties. In
                 a PDN, each member database does not reveal its tuples
                 to its peers nor to the query writer. Instead, the user
                 submits a query to an honest broker that plans and
                 coordinates its execution over multiple private
                 databases using secure multiparty computation (SMC).
                 Here, each database's query execution is oblivious, and
                 its program counters and memory traces are agnostic to
                 the inputs of others. We introduce a framework for
                 executing PDN queries named smcql. This system
                 translates SQL statements into SMC primitives to
                 compute query results over the union of its source
                 databases without revealing sensitive information about
                 individual tuples to peer data providers or the honest
                 broker. Only the honest broker and the querier receive
                 the results of a PDN query. For fast, secure query
                 evaluation, we explore a heuristics-driven optimizer
                 that minimizes the PDN's use of secure computation and
                 partitions its query evaluation into scalable slices.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zamanian:2017:EMD,
  author =       "Erfan Zamanian and Carsten Binnig and Tim Harris and
                 Tim Kraska",
  title =        "The end of a myth: distributed transactions can
                 scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "6",
  pages =        "685--696",
  month =        feb,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The common wisdom is that distributed transactions do
                 not scale. But what if distributed transactions could
                 be made scalable using the next generation of networks
                 and a redesign of distributed databases? There would no
                 longer be a need for developers to worry about
                 co-partitioning schemes to achieve decent performance.
                 Application development would become easier as data
                 placement would no longer determine how scalable an
                 application is. Hardware provisioning would be
                 simplified as the system administrator can expect a
                 linear scale-out when adding more machines rather than
                 some complex sub-linear function, which is highly
                 application specific. In this paper, we present the
                 design of our novel scalable database system NAM-DB and
                 show that distributed transactions with the very common
                 Snapshot Isolation guarantee can indeed scale using the
                 next generation of RDMA-enabled network technology
                 without any inherent bottlenecks. Our experiments with
                 the TPC-C benchmark show that our system scales
                 linearly to over 6.5 million new-order (14.5 million
                 total) distributed transactions per second on 56
                 machines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2017:NIG,
  author =       "Haohan Zhu and Xianrui Meng and George Kollios",
  title =        "{NED}: an inter-graph node metric based on edit
                 distance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "6",
  pages =        "697--708",
  month =        feb,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Node similarity is fundamental in graph analytics.
                 However, node similarity between nodes in different
                 graphs (inter-graph nodes) has not received enough
                 attention yet. The inter-graph node similarity is
                 important in learning a new graph based on the
                 knowledge extracted from an existing graph (transfer
                 learning on graphs) and has applications in biological,
                 communication, and social networks. In this paper, we
                 propose a novel distance function for measuring
                 inter-graph { node} similarity with { edit} {
                 distance}, called NED. In NED, two nodes are compared
                 according to their local neighborhood topologies which
                 are represented as unordered k -adjacent trees, without
                 relying on any extra information. Due to the hardness
                 of computing tree edit distance on unordered trees
                 which is NP-Complete, we propose a modified tree edit
                 distance, called TED*, for comparing unordered and
                 unlabeled k adjacent trees. TED* is a metric distance,
                 as the original tree edit distance, but more
                 importantly, TED* is polynomially computable. As a
                 metric distance, NED admits efficient indexing,
                 provides interpretable results, and shows to perform
                 better than existing approaches on a number of data
                 analysis tasks, including graph deanonymization.
                 Finally, the efficiency and effectiveness of NED are
                 empirically demonstrated using real-world graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fang:2017:ECS,
  author =       "Yixiang Fang and Reynold Cheng and Xiaodong Li and
                 Siqiang Luo and Jiafeng Hu",
  title =        "Effective community search over large spatial graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "6",
  pages =        "709--720",
  month =        feb,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 25 09:01:51 MST 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Communities are prevalent in social networks,
                 knowledge graphs, and biological networks. Recently,
                 the topic of community search (CS) has received plenty
                 of attention. Given a query vertex, CS looks for a
                 dense subgraph that contains it. Existing CS solutions
                 do not consider the spatial extent of a community. They
                 can yield communities whose locations of vertices span
                 large areas. In applications that facilitate the
                 creation of social events (e.g., finding conference
                 attendees to join a dinner), it is important to find
                 groups of people who are physically close to each
                 other. In this situation, it is desirable to have a
                 spatial-aware community (or SAC), whose vertices are
                 close structurally and spatially. Given a graph G and a
                 query vertex q, we develop exact solutions for finding
                 an SAC that contains q. Since these solutions cannot
                 scale to large datasets, we have further designed three
                 approximation algorithms to compute an SAC. We have
                 performed an experimental evaluation for these
                 solutions on both large real and synthetic datasets.
                 Experimental results show that SAC is better than the
                 communities returned by existing solutions. Moreover,
                 our approximation solutions can find SACs accurately
                 and efficiently.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Szlichta:2017:ECD,
  author =       "Jaros{\l}aw Szlichta and Parke Godfrey and Lukasz
                 Golab and Mehdi Kargar and Divesh Srivastava",
  title =        "Effective and complete discovery of order dependencies
                 via set-based axiomatization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "721--732",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Integrity constraints (ICs) are useful for query
                 optimization and for expressing and enforcing
                 application semantics. However, formulating constraints
                 manually requires domain expertise, is prone to human
                 errors, and may be excessively time consuming,
                 especially on large datasets. Hence, proposals for
                 automatic discovery have been made for some classes of
                 ICs, such as functional dependencies (FDs), and
                 recently, order dependencies (ODs). ODs properly
                 subsume FDs, as they can additionally express business
                 rules involving order; e.g., an employee never has a
                 higher salary while paying lower taxes than another
                 employee. We present a new OD discovery algorithm
                 enabled by a novel polynomial mapping to a canonical
                 form of ODs, and a sound and complete set of axioms
                 (inference rules) for canonical ODs. Our algorithm has
                 exponential worst-case time complexity, O (2$^{| R |}$
                 ), in the number of attributes | R | and linear
                 complexity in the number of tuples. We prove that it
                 produces a complete and minimal set of ODs. Using real
                 and synthetic datasets, we experimentally show
                 orders-of-magnitude performance improvements over the
                 prior state-of-the-art.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karnagel:2017:AWP,
  author =       "Tomas Karnagel and Dirk Habich and Wolfgang Lehner",
  title =        "Adaptive work placement for query processing on
                 heterogeneous computing resources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "733--744",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The hardware landscape is currently changing from
                 homogeneous multi-core systems towards heterogeneous
                 systems with many different computing units, each with
                 their own characteristics. This trend is a great
                 opportunity for data-base systems to increase the
                 overall performance if the heterogeneous resources can
                 be utilized efficiently. To achieve this, the main
                 challenge is to place the right work on the right
                 computing unit. Current approaches tackling this
                 placement for query processing assume that data
                 cardinalities of intermediate results can be correctly
                 estimated. However, this assumption does not hold for
                 complex queries. To overcome this problem, we propose
                 an adaptive placement approach being independent of
                 cardinality estimation of intermediate results. Our
                 approach is incorporated in a novel adaptive placement
                 sequence. Additionally, we implement our approach as an
                 extensible virtualization layer, to demonstrate the
                 broad applicability with multiple database systems. In
                 our evaluation, we clearly show that our approach
                 significantly improves OLAP query processing on
                 heterogeneous hardware, while being adaptive enough to
                 react to changing cardinalities of intermediate query
                 results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2017:LFE,
  author =       "Fan Yang and Fanhua Shang and Yuzhen Huang and James
                 Cheng and Jinfeng Li and Yunjian Zhao and Ruihao Zhao",
  title =        "{LFTF}: a framework for efficient tensor analytics at
                 scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "745--756",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Tensors are higher order generalizations of matrices
                 to model multi-aspect data, e.g., a set of purchase
                 records with the schema (user\_id, product\_id,
                 timestamp, feedback). Tensor factorization is a
                 powerful technique for generating a model from a
                 tensor, just like matrix factorization generates a
                 model from a matrix, but with higher accuracy and
                 richer information as more attributes are available in
                 a higher- order tensor than a matrix. The data model
                 obtained by tensor factorization can be used for
                 classification, recommendation, anomaly detection, and
                 so on. Though having a broad range of applications,
                 tensor factorization has not been popularly applied
                 compared with matrix factorization that has been widely
                 used in recommender systems, mainly due to the high
                 computational cost and poor scalability of existing
                 tensor factorization methods. Efficient and scalable
                 tensor factorization is particularly challenging
                 because real world tensor data are mostly sparse and
                 massive. In this paper, we propose a novel distributed
                 algorithm, called Lock-Free Tensor Factorization
                 (LFTF), which significantly improves the efficiency and
                 scalability of distributed tensor factorization by
                 exploiting asynchronous execution in a re-formulated
                 problem. Our experiments show that LFTF achieves much
                 higher CPU and network throughput than existing
                 methods, converges at least 17 times faster and scales
                 to much larger datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gupta:2017:LSM,
  author =       "Shalmoli Gupta and Ravi Kumar and Kefu Lu and Benjamin
                 Moseley and Sergei Vassilvitskii",
  title =        "Local search methods for $k$-means with outliers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "757--768",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of k -means clustering in the
                 presence of outliers. The goal is to cluster a set of
                 data points to minimize the variance of the points
                 assigned to the same cluster, with the freedom of
                 ignoring a small set of data points that can be labeled
                 as outliers. Clustering with outliers has received a
                 lot of attention in the data processing community, but
                 practical, efficient, and provably good algorithms
                 remain unknown for the most popular k -means objective.
                 Our work proposes a simple local search-based algorithm
                 for k -means clustering with outliers. We prove that
                 this algorithm achieves constant-factor approximate
                 solutions and can be combined with known sketching
                 techniques to scale to large data sets. Using empirical
                 evaluation on both synthetic and large-scale real-world
                 data, we demonstrate that the algorithm dominates
                 recently proposed heuristic approaches for the
                 problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Casanova:2017:DTR,
  author =       "Guillaume Casanova and Elias Englmeier and Michael E.
                 Houle and Peer Kr{\"o}ger and Michael Nett and Erich
                 Schubert and Arthur Zimek",
  title =        "Dimensional testing for reverse $k$-nearest neighbor
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "769--780",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a query object q, reverse k -nearest neighbor (R
                 k NN) search aims to locate those objects of the
                 database that have q among their k -nearest neighbors.
                 In this paper, we propose an approximation method for
                 solving R k NN queries, where the pruning operations
                 and termination tests are guided by a characterization
                 of the intrinsic dimensionality of the data. The method
                 can accommodate any index structure supporting
                 incremental (forward) nearest-neighbor search for the
                 generation and verification of candidates, while
                 avoiding impractically-high preprocessing costs. We
                 also provide experimental evidence that our method
                 significantly outperforms its competitors in terms of
                 the tradeoff between execution time and the quality of
                 the approximation. Our approach thus addresses many of
                 the scalability issues surrounding the use of previous
                 methods in data mining.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2017:EEM,
  author =       "Yingjun Wu and Joy Arulraj and Jiexi Lin and Ran Xian
                 and Andrew Pavlo",
  title =        "An empirical evaluation of in-memory multi-version
                 concurrency control",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "781--792",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multi-version concurrency control (MVCC) is currently
                 the most popular transaction management scheme in
                 modern database management systems (DBMSs). Although
                 MVCC was discovered in the late 1970s, it is used in
                 almost every major relational DBMS released in the last
                 decade. Maintaining multiple versions of data
                 potentially increases parallelism without sacrificing
                 serializability when processing transactions. But
                 scaling MVCC in a multi-core and in-memory setting is
                 non-trivial: when there are a large number of threads
                 running in parallel, the synchronization overhead can
                 outweigh the benefits of multi-versioning. To
                 understand how MVCC perform when processing
                 transactions in modern hardware settings, we conduct an
                 extensive study of the scheme's four key design
                 decisions: concurrency control protocol, version
                 storage, garbage collection, and index management. We
                 implemented state-of-the-art variants of all of these
                 in an in-memory DBMS and evaluated them using OLTP
                 workloads. Our analysis identifies the fundamental
                 bottlenecks of each design choice.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2017:FDH,
  author =       "You Wu and Junyang Gao and Pankaj K. Agarwal and Jun
                 Yang",
  title =        "Finding diverse, high-value representatives on a
                 surface of answers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "793--804",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In many applications, the system needs to selectively
                 present a small subset of answers to users. The set of
                 all possible answers can be seen as an elevation
                 surface over a domain, where the elevation measures the
                 quality of each answer, and the dimensions of the
                 domain correspond to attributes of the answers with
                 which similarity between answers can be measured. This
                 paper considers the problem of finding a diverse set of
                 k high-quality representatives for such a surface. We
                 show that existing methods for diversified top- k and
                 weighted clustering problems are inadequate for this
                 problem. We propose k -DHR as a better formulation for
                 the problem. We show that k -DHR has a submodular and
                 monotone objective function, and we develop efficient
                 algorithms for solving k -DHR with provable guarantees.
                 We conduct extensive experiments to demonstrate the
                 usefulness of the results produced by k -DHR for
                 applications in computational lead-finding and
                 fact-checking, as well as the efficiency and
                 effectiveness of our algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2017:RTI,
  author =       "Yanhao Wang and Qi Fan and Yuchen Li and Kian-Lee
                 Tan",
  title =        "Real-time influence maximization on dynamic social
                 streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "805--816",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Influence maximization (IM), which selects a set of
                 $k$ users (called seeds) to maximize the influence
                 spread over a social network, is a fundamental problem
                 in a wide range of applications such as viral marketing
                 and network monitoring. Existing IM solutions fail to
                 consider the highly dynamic nature of social influence,
                 which results in either poor seed qualities or long
                 processing time when the network evolves. To address
                 this problem, we define a novel IM query named Stream
                 Influence Maximization (SIM) on social streams.
                 Technically, SIM adopts the sliding window model and
                 maintains a set of $k$ seeds with the largest influence
                 value over the most recent social actions. Next, we
                 propose the Influential Checkpoints (IC) framework to
                 facilitate continuous SIM query processing. The IC
                 framework creates a checkpoint for each window shift
                 and ensures an $ \epsilon $-approximate solution. To
                 improve its efficiency, we further devise a Sparse
                 Influential Checkpoints (SIC) framework which
                 selectively keeps $ O(l o g N / \beta)$ checkpoints for
                 a sliding window of size $N$ and maintains an $
                 \epsilon (1 \beta) / 2$-approximate solution.
                 Experimental results on both real-world and synthetic
                 datasets confirm the effectiveness and efficiency of
                 our proposed frameworks against the state-of-the-art IM
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cai:2017:CDC,
  author =       "Hongyun Cai and Vincent W. Zheng and Fanwei Zhu and
                 Kevin Chen-Chuan Chang and Zi Huang",
  title =        "From community detection to community profiling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "817--828",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Most existing community-related studies focus on
                 detection, which aim to find the community membership
                 for each user from user friendship links. However,
                 membership alone, without a complete profile of what a
                 community is and how it interacts with other
                 communities, has limited applications. This motivates
                 us to consider systematically profiling the communities
                 and thereby developing useful community-level
                 applications. In this paper, we for the first time
                 formalize the concept of community profiling. With rich
                 user information on the network, such as user published
                 content and user diffusion links, we characterize a
                 community in terms of both its internal content profile
                 and external diffusion profile. The difficulty of
                 community profiling is often underestimated. We novelly
                 identify three unique challenges and propose a joint
                 Community Profiling and Detection (CPD) model to
                 address them accordingly. We also contribute a scalable
                 inference algorithm, which scales linearly with the
                 data size and it is easily parallelizable. We evaluate
                 CPD on large-scale real-world data sets, and show that
                 it is significantly better than the state-of-the-art
                 baselines in various tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jain:2017:UWD,
  author =       "Ayush Jain and Akash Das Sarma and Aditya Parameswaran
                 and Jennifer Widom",
  title =        "Understanding workers, developing effective tasks, and
                 enhancing marketplace dynamics: a study of a large
                 crowdsourcing marketplace",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "829--840",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We conduct an experimental analysis of a dataset
                 comprising over 27 million microtasks performed by over
                 70,000 workers issued to a large crowdsourcing
                 marketplace between 2012--2016. Using this data---never
                 before analyzed in an academic context---we shed light
                 on three crucial aspects of crowdsourcing: (1) Task
                 design---helping requesters understand what constitutes
                 an effective task, and how to go about designing one;
                 (2) Marketplace dynamics --- helping marketplace
                 administrators and designers understand the interaction
                 between tasks and workers, and the corresponding
                 marketplace load; and (3) Worker behavior ---
                 understanding worker attention spans, lifetimes, and
                 general behavior, for the improvement of the
                 crowdsourcing ecosystem as a whole.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lin:2017:OPE,
  author =       "Xuelian Lin and Shuai Ma and Han Zhang and Tianyu Wo
                 and Jinpeng Huai",
  title =        "One-pass error bounded trajectory simplification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "7",
  pages =        "841--852",
  month =        mar,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Mar 27 20:45:15 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nowadays, various sensors are collecting, storing and
                 transmitting tremendous trajectory data, and it is
                 known that raw trajectory data seriously wastes the
                 storage, network band and computing resource. Line
                 simplification (LS) algorithms are an effective
                 approach to attacking this issue by compressing data
                 points in a trajectory to a set of continuous line
                 segments, and are commonly used in practice. However,
                 existing LS algorithms are not sufficient for the needs
                 of sensors in mobile devices. In this study, we first
                 develop a one-pass error bounded trajectory
                 simplification algorithm (OPERB), which scans each data
                 point in a trajectory once and only once. We then
                 propose an aggressive one-pass error bounded trajectory
                 simplification algorithm (OPERB-A), which allows
                 interpolating new data points into a trajectory under
                 certain conditions. Finally, we experimentally verify
                 that our approaches (OPERB and OPERB-A) are both
                 efficient and effective, using four real-life
                 trajectory datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2017:MIL,
  author =       "Jianguo Wang and Chunbin Lin and Ruining He and Moojin
                 Chae and Yannis Papakonstantinou and Steven Swanson",
  title =        "{MILC}: inverted list compression in memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "8",
  pages =        "853--864",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3090163.3090164",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Inverted list compression is a topic that has been
                 studied for 50 years due to its fundamental importance
                 in numerous applications including information
                 retrieval, databases, and graph analytics. Typically,
                 an inverted list compression algorithm is evaluated on
                 its space overhead and query processing time. Earlier
                 list compression designs mainly focused on minimizing
                 the space overhead to reduce expensive disk I/O time in
                 disk-oriented systems. But the recent trend is shifted
                 towards reducing query processing time because the
                 underlying systems tend to be memory-resident. Although
                 there are many highly optimized compression approaches
                 in main memory, there is still a considerable
                 performance gap between query processing over
                 compressed lists and uncompressed lists, which
                 motivates this work. In this work, we set out to bridge
                 this performance gap for the first time by proposing a
                 new compression scheme, namely, MILC (memory inverted
                 list compression). MILC relies on a series of
                 techniques including offset-oriented fixed-bit
                 encoding, dynamic partitioning, in-block compression,
                 cache-aware optimization, and SIMD acceleration. We
                 conduct experiments on three real-world datasets in
                 information retrieval, databases, and graph analytics
                 to demonstrate the high performance and low space
                 overhead of MILC. We compare MILC with 12 recent
                 compression algorithms and experimentally show that
                 MILC improves the query performance by up to 13.2$
                 \times $ and reduces the space overhead by up to 4.7$
                 \times $.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2017:CDD,
  author =       "Botong Huang and Jun Yang",
  title =        "{C{\"u}m{\"u}l{\"o}n--D}: data analytics in a dynamic
                 spot market",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "8",
  pages =        "865--876",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3090163.3090165",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a system called C{\"u}m{\"u}l{\"o}n-D for
                 matrix-based data analysis in a spot market of a public
                 cloud. Prices in such markets fluctuate over time:
                 while users can acquire machines usually at a very low
                 bid price, the cloud can terminate these machines as
                 soon as the market price exceeds their bid price. The
                 distinguishing features of C{\"u}m{\"u}l{\"o}n-D
                 include its continuous, proactive adaptation to the
                 changing market, and its ability to quantify and
                 control the monetary risk involved in paying for a
                 workflow execution. We solve the dynamic optimization
                 problem in a principled manner with a Markov decision
                 process, and account for practical details that are
                 often ignored previously but nonetheless important to
                 performance. We evaluate C{\"u}m{\"u}l{\"o}n-D's
                 effectiveness and advantages over previous approaches
                 with experiments on Amazon EC2.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Then:2017:AAT,
  author =       "Manuel Then and Timo Kersten and Stephan G{\"u}nnemann
                 and Alfons Kemper and Thomas Neumann",
  title =        "Automatic algorithm transformation for efficient
                 multi-snapshot analytics on temporal graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "8",
  pages =        "877--888",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3090163.3090166",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analytical graph algorithms commonly compute metrics
                 for a graph at one point in time. In practice it is
                 often also of interest how metrics change over time,
                 e.g., to find trends. For this purpose, algorithms must
                 be executed for multiple graph snapshots. We present
                 Single Algorithm Multiple Snapshots (SAMS), a novel
                 approach to execute algorithms concurrently for
                 multiple graph snapshots. SAMS automatically transforms
                 graph algorithms to leverage similarities between the
                 analyzed graph snapshots. The automatic transformation
                 interleaves algorithm executions on multiple snapshots,
                 synergistically shares their graph accesses and
                 traversals, and optimizes the algorithm's data layout.
                 Thus, SAMS can amortize the cost of random data
                 accesses and improve memory bandwidth utilization---two
                 main cost factors in graph analytics. We extensively
                 evaluate SAMS using six well-known algorithms and
                 multiple synthetic as well as real-world graph
                 datasets. Our measurements show that in multi-snapshot
                 analyses, SAMS offers runtime improvements of up to two
                 orders of magnitude over traditional snapshot-at-a-time
                 execution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2017:LAM,
  author =       "Jianqiao Zhu and Navneet Potti and Saket Saurabh and
                 Jignesh M. Patel",
  title =        "Looking ahead makes query plans robust: making the
                 initial case with in-memory star schema data warehouse
                 workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "8",
  pages =        "889--900",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3090163.3090167",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query optimizers and query execution engines cooperate
                 to deliver high performance on complex analytic
                 queries. Typically, the optimizer searches through the
                 plan space and sends a selected plan to the execution
                 engine. However, optimizers may at times miss the
                 optimal plan, with sometimes disastrous impact on
                 performance. In this paper, we develop the notion of
                 robustness of a query evaluation strategy with respect
                 to a space of query plans. We also propose a novel
                 query execution strategy called Lookahead Information
                 Passing (LIP) that is robust with respect to the space
                 of (fully pipeline-able) left-deep query plan trees for
                 in-memory star schema data warehouses. LIP ensures that
                 execution times for the best and the worst case plans
                 are far closer than without LIP. In fact, under certain
                 assumptions of independent and uniform distributions,
                 any plan in that space is theoretically guaranteed to
                 execute in near-optimal time. LIP ensures that the
                 execution time for every plan in the space is
                 nearly-optimal. In this paper, we also evaluate these
                 claims using workloads that include skew and
                 correlation. With LIP we make an initial foray into a
                 novel way of thinking about robustness from the
                 perspective of query evaluation, where we develop
                 strategies (like LIP) that collapse plan sub-spaces in
                 the overall global plan space.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Anderson:2017:BGB,
  author =       "Michael Anderson and Shaden Smith and Narayanan
                 Sundaram and Mihai Capota and Zheguang Zhao and
                 Subramanya Dulloor and Nadathur Satish and Theodore L.
                 Willke",
  title =        "Bridging the gap between {HPC} and big data
                 frameworks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "8",
  pages =        "901--912",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3090163.3090168",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Apache Spark is a popular framework for data analytics
                 with attractive features such as fault tolerance and
                 interoperability with the Hadoop ecosystem.
                 Unfortunately, many analytics operations in Spark are
                 an order of magnitude or more slower compared to native
                 implementations written with high performance computing
                 tools such as MPI. There is a need to bridge the
                 performance gap while retaining the benefits of the
                 Spark ecosystem such as availability, productivity, and
                 fault tolerance. In this paper, we propose a system for
                 integrating MPI with Spark and analyze the costs and
                 benefits of doing so for four distributed graph and
                 machine learning applications. We show that offloading
                 computation to an MPI environment from within Spark
                 provides 3.1--17.7$ \times $ speedups on the four
                 sparse applications, including all of the overheads.
                 This opens up an avenue to reuse existing MPI libraries
                 in Spark with little effort.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2017:RSS,
  author =       "Keke Huang and Sibo Wang and Glenn Bevilacqua and
                 Xiaokui Xiao and Laks V. S. Lakshmanan",
  title =        "Revisiting the stop-and-stare algorithms for influence
                 maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "9",
  pages =        "913--924",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Influence maximization is a combinatorial optimization
                 problem that finds important applications in viral
                 marketing, feed recommendation, etc. Recent research
                 has led to a number of scalable approximation
                 algorithms for influence maximization, such as TIM$^+$
                 and IMM, and more recently, SSA and D-SSA. The goal of
                 this paper is to conduct a rigorous theoretical and
                 experimental analysis of SSA and D-SSA and compare them
                 against the preceding algorithms. In doing so, we
                 uncover inaccuracies in previously reported technical
                 results on the accuracy and efficiency of SSA and
                 D-SSA, which we set right. We also attempt to reproduce
                 the original experiments on SSA and D-SSA, based on
                 which we provide interesting empirical insights. Our
                 evaluation confirms some results reported from the
                 original experiments, but it also reveals anomalies in
                 some other results and sheds light on the behavior of
                 SSA and D-SSA in some important settings not considered
                 previously. We also report on the performance of
                 SSA-Fix, our modification to SSA in order to restore
                 the approximation guarantee that was claimed for but
                 not enjoyed by SSA. Overall, our study suggests that
                 there exist opportunities for further scaling up
                 influence maximization with approximation guarantees.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2017:LSR,
  author =       "Xubo Wang and Lu Qin and Xuemin Lin and Ying Zhang and
                 Lijun Chang",
  title =        "Leveraging set relations in exact set similarity
                 join",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "9",
  pages =        "925--936",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Exact set similarity join, which finds all the similar
                 set pairs from two collections of sets, is a
                 fundamental problem with a wide range of applications.
                 The existing solutions for set similarity join follow a
                 filtering-verification framework, which generates a
                 list of candidate pairs through scanning indexes in the
                 filtering phase, and reports those similar pairs in the
                 verification phase. Though much research has been
                 conducted on this problem, set relations, which we find
                 out is quite effective on improving the algorithm
                 efficiency through computational cost sharing, have
                 never been studied. Therefore, in this paper, instead
                 of considering each set individually, we explore the
                 set relations in different levels to reduce the overall
                 computational costs. First, it has been shown that most
                 of the computational time is spent on the filtering
                 phase, which can be quadratic to the number of sets in
                 the worst case for the existing solutions. Thus we
                 explore index-level set relations to reduce the
                 filtering cost to be linear to the size of the input
                 while keeping the same filtering power. We achieve this
                 by grouping related sets into blocks in the index and
                 skipping useless index probes in joins. Second, we
                 explore answer-level set relations to further improve
                 the algorithm based on the intuition that if two sets
                 are similar, their answers may have a large overlap. We
                 derive an algorithm which incrementally generates the
                 answer of one set from an already computed answer of
                 another similar set rather than compute the answer from
                 scratch to reduce the computational cost. Finally, we
                 conduct extensive performance studies using 21 real
                 datasets with various data properties from a wide range
                 of domains. The experimental results demonstrate that
                 our algorithm outperforms all the existing algorithms
                 across all datasets and can achieve more than an order
                 of magnitude speedup against the state- of-the-art
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jiang:2017:RRW,
  author =       "Minhao Jiang and Ada Wai-Chee Fu and Raymond Chi-Wing
                 Wong",
  title =        "{READS}: a random walk approach for efficient and
                 accurate dynamic {SimRank}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "9",
  pages =        "937--948",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Similarity among entities in graphs plays a key role
                 in data analysis and mining. SimRank is a widely used
                 and popular measurement to evaluate the similarity
                 among the vertices. In real-life applications, graphs
                 do not only grow in size, requiring fast and precise
                 SimRank computation for large graphs, but also change
                 and evolve continuously over time, demanding an
                 efficient maintenance process to handle dynamic
                 updates. In this paper, we propose a random walk based
                 indexing scheme to compute SimRank efficiently and
                 accurately over large dynamic graphs. We show that our
                 algorithm outperforms the state-of-the-art static and
                 dynamic SimRank algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2017:ADC,
  author =       "Xin Huang and Laks V. S. Lakshmanan",
  title =        "Attribute-driven community search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "9",
  pages =        "949--960",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, community search over graphs has gained
                 significant interest. In applications such as analysis
                 of protein-protein interaction (PPI) networks, citation
                 graphs, and collaboration networks, nodes tend to have
                 attributes. Unfortunately, most previous community
                 search algorithms ignore attributes and result in
                 communities with poor cohesion w.r.t. their node
                 attributes. In this paper, we study the problem of
                 attribute-driven community search, that is, given an
                 undirected graph G where nodes are associated with
                 attributes, and an input query Q consisting of nodes
                 V$_q$ and attributes W$_q$, find the communities
                 containing V$_q$, in which most community members are
                 densely inter-connected and have similar attributes. We
                 formulate this problem as finding attributed truss
                 communities (ATC), i.e., finding connected and close
                 k-truss subgraphs containing V$_q$, with the largest
                 attribute relevance score. We design a framework of
                 desirable properties that good score function should
                 satisfy. We show that the problem is NP-hard. However,
                 we develop an efficient greedy algorithmic framework to
                 iteratively remove nodes with the least popular
                 attributes, and shrink the graph into an ATC. In
                 addition, we also build an elegant index to maintain k
                 -truss structure and attribute information, and propose
                 efficient query processing algorithms. Extensive
                 experiments on large real-world networks with
                 ground-truth communities show that our algorithms
                 significantly outperform the state of the art and
                 demonstrates their efficiency and effectiveness.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2017:BAS,
  author =       "Jiecao Chen and Qin Zhang",
  title =        "Bias-aware sketches",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "9",
  pages =        "961--972",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Linear sketching algorithms have been widely used for
                 processing large-scale distributed and streaming
                 datasets. Their popularity is largely due to the fact
                 that linear sketches can be naturally composed in the
                 distributed model and be efficiently updated in the
                 streaming model. The errors of linear sketches are
                 typically expressed in terms of the sum of coordinates
                 of the input vector excluding those largest ones, or,
                 the mass on the tail of the vector. Thus, the
                 precondition for these algorithms to perform well is
                 that the mass on the tail is small, which is, however,
                 not always the case --- in many real-world datasets the
                 coordinates of the input vector have a bias, which will
                 generate a large mass on the tail. In this paper we
                 propose linear sketches that are bias- aware. We
                 rigorously prove that they achieve strictly better
                 error guarantees than the corresponding existing
                 sketches, and demonstrate their practicality and
                 superiority via an extensive experimental evaluation on
                 both real and synthetic datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2017:DDA,
  author =       "Yang Cao and Wenfei Fan",
  title =        "Data driven approximation with bounded resources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "9",
  pages =        "973--984",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper proposes BEAS, a resource-bounded scheme
                 for querying relations. It is parameterized with a
                 resource ratio $ \alpha \in (0, 1] $, indicating that
                 given a big dataset D, we can only afford to access an
                 $ \alpha $-fraction of D with limited resources. For a
                 query Q posed on D, BEAS computes exact answers Q(D) if
                 doable and otherwise approximate answers, by accessing
                 at most $ \alpha | D |$ amount of data in the entire
                 process. Underlying BEAS are (1) an access schema,
                 which helps us identify and fetch the part of data
                 needed to answer Q, (2) an accuracy measure to assess
                 approximate answers in terms of their relevance and
                 coverage w.r.t. exact answers, (3) an Approximability
                 Theorem for the feasibility of resource-bounded
                 approximation, and (4) algorithms for query evaluation
                 with bounded resources. A unique feature of BEAS is its
                 ability to answer unpredictable queries, aggregate or
                 not, using bounded resources and assuring a
                 deterministic accuracy lower bound. Using real-life and
                 synthetic data, we empirically verify the effectiveness
                 and efficiency of BEAS.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khayyat:2017:ELF,
  author =       "Zuhair Khayyat and William Lucia and Meghna Singh and
                 Mourad Ouzzani and Paolo Papotti and Jorge-Arnulfo
                 Quian{\'e}-Ruiz and Nan Tang and Panos Kalnis",
  title =        "Errata for {``Lightning Fast and Space Efficient
                 Inequality Joins'' (PVLDB 8(13): 2074--2085)}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "9",
  pages =        "985--985",
  month =        may,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See \cite{Khayyat:2015:LFS}.",
  abstract =     "This is in response to recent feedback from some
                 readers, which requires some clarifications regarding
                 our IEJoin algorithm published in [1]. The feedback
                 revolves around four points: (1) a typo in our
                 illustrating example of the join process; (2) a naming
                 error for the index used by our algorithm to improve
                 the bit array scan; (3) the sort order used in our
                 algorithms; and (4) a missing explanation on how
                 duplicates are handled by our self join algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qin:2017:SAG,
  author =       "Chengjie Qin and Martin Torres and Florin Rusu",
  title =        "Scalable asynchronous gradient descent optimization
                 for out-of-core models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "986--997",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Existing data analytics systems have approached
                 predictive model training exclusively from a
                 data-parallel perspective. Data examples are
                 partitioned to multiple workers and training is
                 executed concurrently over different partitions, under
                 various synchronization policies that emphasize speedup
                 or convergence. Since models with millions and even
                 billions of features become increasingly common
                 nowadays, model management becomes an equally important
                 task for effective training. In this paper, we present
                 a general framework for parallelizing stochastic
                 optimization algorithms over massive models that cannot
                 fit in memory. We extend the lock-free HOGWILD!-family
                 of algorithms to disk-resident models by vertically
                 partitioning the model offline and asynchronously
                 updating the resulting partitions online. Unlike
                 HOGWILD!, concurrent requests to the common model are
                 minimized by a preemptive push-based sharing mechanism
                 that reduces the number of disk accesses. Experimental
                 results on real and synthetic datasets show that the
                 proposed framework achieves improved convergence over
                 HOGWILD! and is the only solution scalable to massive
                 models.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2017:WEM,
  author =       "Fan Zhang and Ying Zhang and Lu Qin and Wenjie Zhang
                 and Xuemin Lin",
  title =        "When engagement meets similarity: efficient $ (k,
                 r)$-core computation on social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "998--1009",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we investigate the problem of $ (k,
                 r)$-core which intends to find cohesive subgraphs on
                 social networks considering both user engagement and
                 similarity perspectives. In particular, we adopt the
                 popular concept of $k$-core to guarantee the engagement
                 of the users (vertices) in a group (subgraph) where
                 each vertex in a $ (k, r)$-core connects to at least k
                 other vertices. Meanwhile, we consider the pairwise
                 similarity among users based on their attributes.
                 Efficient algorithms are proposed to enumerate all
                 maximal $ (k, r)$-cores and find the maximum $ (k,
                 r)$-core, where both problems are shown to be NP-hard.
                 Effective pruning techniques substantially reduce the
                 search space of two algorithms. A novel $ (k, k')$-core
                 based $ (k, r)$-core size upper bound enhances
                 performance of the maximum $ (k, r)$-core computation.
                 We also devise effective search orders for two mining
                 algorithms where search priorities for vertices are
                 different. Comprehensive experiments on real-life data
                 demonstrate that the maximal/maximum $ (k, r)$-cores
                 enable us to find interesting cohesive subgraphs, and
                 performance of two mining algorithms is effectively
                 improved by proposed techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2017:EEP,
  author =       "Yiding Liu and Tuan-Anh Nguyen Pham and Gao Cong and
                 Quan Yuan",
  title =        "An experimental evaluation of point-of-interest
                 recommendation in location-based social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1010--1021",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Point-of-interest (POI) recommendation is an important
                 service to Location-Based Social Networks (LBSNs) that
                 can benefit both users and businesses. In recent years,
                 a number of POI recommender systems have been proposed,
                 but there is still a lack of systematical comparison
                 thereof. In this paper, we provide an all-around
                 evaluation of 12 state-of-the-art POI recommendation
                 models. From the evaluation, we obtain several
                 important findings, based on which we can better
                 understand and utilize POI recommendation models in
                 various scenarios. We anticipate this work to provide
                 readers with an overall picture of the cutting-edge
                 research on POI recommendation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Raasveldt:2017:DHM,
  author =       "Mark Raasveldt and Hannes M{\"u}hleisen",
  title =        "Don't hold my data hostage: a case for client protocol
                 redesign",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1022--1033",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Transferring a large amount of data from a database to
                 a client program is a surprisingly expensive operation.
                 The time this requires can easily dominate the query
                 execution time for large result sets. This represents a
                 significant hurdle for external data analysis, for
                 example when using statistical software. In this paper,
                 we explore and analyse the result set serialization
                 design space. We present experimental results from a
                 large chunk of the database market and show the
                 inefficiencies of current approaches. We then propose a
                 columnar serialization method that improves
                 transmission performance by an order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2017:AJJ,
  author =       "Erkang Zhu and Yeye He and Surajit Chaudhuri",
  title =        "Auto-join: joining tables by leveraging
                 transformations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1034--1045",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional equi-join relies solely on string equality
                 comparisons to perform joins. However, in scenarios
                 such as ad-hoc data analysis in spreadsheets, users
                 increasingly need to join tables whose join-columns are
                 from the same semantic domain but use different textual
                 representations, for which transformations are needed
                 before equi-join can be performed. We developed
                 Auto-Join, a system that can automatically search over
                 a rich space of operators to compose a transformation
                 program, whose execution makes input tables
                 equi-join-able. We developed an optimal sampling
                 strategy that allows Auto-Join to scale to large
                 datasets efficiently, while ensuring joins succeed with
                 high probability. Our evaluation using real test cases
                 collected from both public web tables and proprietary
                 enterprise tables shows that the proposed system
                 performs the desired transformation joins efficiently
                 and with high quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2017:TSD,
  author =       "Aoqian Zhang and Shaoxu Song and Jianmin Wang and
                 Philip S. Yu",
  title =        "Time series data cleaning: from anomaly detection to
                 anomaly repairing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1046--1057",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Errors are prevalent in time series data, such as GPS
                 trajectories or sensor readings. Existing methods focus
                 more on anomaly detection but not on repairing the
                 detected anomalies. By simply filtering out the dirty
                 data via anomaly detection, applications could still be
                 unreliable over the incomplete time series. Instead of
                 simply discarding anomalies, we propose to
                 (iteratively) repair them in time series data, by
                 creatively bonding the beauty of temporal nature in
                 anomaly detection with the widely considered minimum
                 change principle in data repairing. Our major
                 contributions include: (1) a novel framework of
                 iterative minimum repairing (IMR) over time series
                 data, (2) explicit analysis on convergence of the
                 proposed iterative minimum repairing, and (3) efficient
                 estimation of parameters in each iteration. Remarkably,
                 with incremental computation, we reduce the complexity
                 of parameter estimation from O (n) to O (1).
                 Experiments on real datasets demonstrate the
                 superiority of our proposal compared to the
                 state-of-the-art approaches. In particular, we show
                 that (the proposed) repairing indeed improves the time
                 series classification application.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2017:PBM,
  author =       "Lu Chen and Yunjun Gao and Baihua Zheng and Christian
                 S. Jensen and Hanyu Yang and Keyu Yang",
  title =        "Pivot-based metric indexing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1058--1069",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The general notion of a metric space encompasses a
                 diverse range of data types and accompanying similarity
                 measures. Hence, metric search plays an important role
                 in a wide range of settings, including multimedia
                 retrieval, data mining, and data integration. With the
                 aim of accelerating metric search, a collection of
                 pivot-based indexing techniques for metric data has
                 been proposed, which reduces the number of potentially
                 expensive similarity comparisons by exploiting the
                 triangle inequality for pruning and validation.
                 However, no comprehensive empirical study of those
                 techniques exists. Existing studies each offers only a
                 narrower coverage, and they use different pivot
                 selection strategies that affect performance
                 substantially and thus render cross-study comparisons
                 difficult or impossible. We offer a survey of existing
                 pivot-based indexing techniques, and report a
                 comprehensive empirical comparison of their
                 construction costs, update efficiency, storage sizes,
                 and similarity search performance. As part of the
                 study, we provide modifications for two existing
                 indexing techniques to make them more competitive. The
                 findings and insights obtained from the study reveal
                 different strengths and weaknesses of different
                 indexing techniques, and offer guidance on selecting an
                 appropriate indexing technique for a given setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guerraoui:2017:HRW,
  author =       "Rachid Guerraoui and Anne-Marie Kermarrec and Tao Lin
                 and Rhicheek Patra",
  title =        "Heterogeneous recommendations: what you might like to
                 read after watching interstellar",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1070--1081",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recommenders, as widely implemented nowadays by major
                 e-commerce players like Netflix or Amazon, use
                 collaborative filtering to suggest the most relevant
                 items to their users. Clearly, the effectiveness of
                 recommenders depends on the data they can exploit,
                 i.e., the feedback of users conveying their
                 preferences, typically based on their past ratings. As
                 of today, most recommenders are homogeneous in the
                 sense that they utilize one specific application at a
                 time. In short, Alice will only get recommended a movie
                 if she has been rating movies. But what if she has been
                 only rating books and would like to get recommendations
                 for a movie? Clearly, the multiplicity of web
                 applications is calling for heterogeneous recommenders
                 that could utilize ratings in one application to
                 provide recommendations in another one. This paper
                 presents X-M ap, a heterogeneous recommender. X-Map
                 leverages meta-paths between heterogeneous items over
                 several application domains, based on users who rated
                 across these domains. These meta-paths are then used in
                 X-Map to generate, for every user, a profile (AlterEgo
                 ) in a domain where the user might not have rated any
                 item yet. Not surprisingly, leveraging meta-paths poses
                 non-trivial issues of (a) meta-path-based inter-item
                 similarity, in order to enable accurate predictions,
                 (b) scalability, given the amount of computation
                 required, as well as (c) privacy, given the need to
                 aggregate information across multiple applications. We
                 show in this paper how X-M ap addresses the
                 above-mentioned issues to achieve accuracy, scalability
                 and differential privacy. In short, X-Map weights the
                 meta-paths based on several factors to compute
                 inter-item similarities, and ensures scalability
                 through a layer-based pruning technique. X-Map
                 guarantees differential privacy using an exponential
                 scheme that leverages the meta-path-based similarities
                 while determining the probability of item selection to
                 construct the AlterEgos. We present an exhaustive
                 experimental evaluation of X-Map using real traces from
                 Amazon. We show that, in terms of accuracy, X-Map
                 outperforms alternative heterogeneous recommenders and,
                 in terms of throughput, X-Map achieves a linear speedup
                 with an increasing number of machines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deng:2017:SEM,
  author =       "Dong Deng and Albert Kim and Samuel Madden and Michael
                 Stonebraker",
  title =        "{SilkMoth}: an efficient method for finding related
                 sets with maximum matching constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1082--1093",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Determining if two sets are related --- that is, if
                 they have similar values or if one set contains the
                 other --- is an important problem with many
                 applications in data cleaning, data integration, and
                 information retrieval. For example, set relatedness can
                 be a useful tool to discover whether columns from two
                 different databases are joinable; if enough of the
                 values in the columns match, it may make sense to join
                 them. A common metric is to measure the relatedness of
                 two sets by treating the elements as vertices of a
                 bipartite graph and calculating the score of the
                 maximum matching pairing between elements. Compared to
                 other metrics which require exact matchings between
                 elements, this metric uses a similarity function to
                 compare elements between the two sets, making it robust
                 to small dissimilarities in elements and more useful
                 for real-world, dirty data. Unfortunately, the metric
                 suffers from expensive computational cost, taking O (
                 n$^3$) time, where n is the number of elements in the
                 sets, for each set-to-set comparison. Thus for
                 applications that try to search for all pairings of
                 related sets in a brute-force manner, the runtime
                 becomes unacceptably large. To address this challenge,
                 we developed SilkMoth, a system capable of rapidly
                 discovering related set pairs in collections of sets.
                 Internally, SilkMoth creates a signature for each set,
                 with the property that any other set which is related
                 must match the signature. SilkMoth then uses these
                 signatures to prune the search space, so only sets that
                 match the signatures are left as candidates. Finally,
                 SilkMoth applies the maximum matching metric on
                 remaining candidates to verify which of these
                 candidates are truly related sets. An important
                 property of SilkMoth is that it is guaranteed to output
                 exactly the same related set pairings as the
                 brute-force method, unlike approximate techniques.
                 Thus, a contribution of this paper is the
                 characterization of the space of signatures which
                 enable this property. We show that selecting the
                 optimal signature in this space is NP-complete, and
                 based on insights from the characterization of the
                 space, we propose two novel filters which help to prune
                 the candidates further before verification. In
                 addition, we introduce a simple optimization to the
                 calculation of the maximum matching metric itself based
                 on the triangle inequality. Compared to related
                 approaches, SilkMoth is much more general, handling a
                 larger space of similarity functions and relatedness
                 metrics, and is an order of magnitude more efficient on
                 real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chung:2017:DQM,
  author =       "Yeounoh Chung and Sanjay Krishnan and Tim Kraska",
  title =        "A data quality metric {(DQM)}: how to estimate the
                 number of undetected errors in data sets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1094--1105",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data cleaning, whether manual or algorithmic, is
                 rarely perfect leaving a dataset with an unknown number
                 of false positives and false negatives after cleaning.
                 In many scenarios, quantifying the number of remaining
                 errors is challenging because our data integrity rules
                 themselves may be incomplete, or the available
                 gold-standard datasets may be too small to extrapolate.
                 As the use of inherently fallible crowds becomes more
                 prevalent in data cleaning problems, it is important to
                 have estimators to quantify the extent of such errors.
                 We propose novel species estimators to estimate the
                 number of distinct remaining errors in a dataset after
                 it has been cleaned by a set of crowd workers ---
                 essentially, quantifying the utility of hiring
                 additional workers to clean the dataset. This problem
                 requires new estimators that are robust to false
                 positives and false negatives, and we empirically show
                 on three real-world datasets that existing species
                 estimators are unstable for this problem, while our
                 proposed techniques quickly converge.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Olma:2017:SCT,
  author =       "Matthaios Olma and Manos Karpathiotakis and Ioannis
                 Alagiannis and Manos Athanassoulis and Anastasia
                 Ailamaki",
  title =        "{Slalom}: coasting through raw data via adaptive
                 partitioning and indexing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1106--1117",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The constant flux of data and queries alike has been
                 pushing the boundaries of data analysis systems. The
                 increasing size of raw data files has made data loading
                 an expensive operation that delays the data-to-insight
                 time. Hence, recent in-situ query processing systems
                 operate directly over raw data, alleviating the loading
                 cost. At the same time, analytical workloads have
                 increasing number of queries. Typically, each query
                 focuses on a constantly shifting --- yet small ---
                 range. Minimizing the workload latency, now, requires
                 the benefits of indexing in in-situ query processing.
                 In this paper, we present Slalom, an in-situ query
                 engine that accommodates workload shifts by monitoring
                 user access patterns. Slalom makes on-the-fly
                 partitioning and indexing decisions, based on
                 information collected by lightweight monitoring. Slalom
                 has two key components: (i) an online partitioning and
                 indexing scheme, and (ii) a partitioning and indexing
                 tuner tailored for in-situ query engines. When compared
                 to the state of the art, Slalom offers performance
                 benefits by taking into account user query patterns to
                 (a) logically partition raw data files and (b) build
                 for each partition lightweight partition-specific
                 indexes. Due to its lightweight and adaptive nature,
                 Slalom achieves efficient accesses to raw data with
                 minimal memory consumption. Our experimentation with
                 both micro-benchmarks and real-life workloads shows
                 that Slalom outperforms state-of-the-art in-situ
                 engines (3--10$ \times $), and achieves comparable
                 query response times with fully indexed DBMS, offering
                 much lower ($ \approx 3 \times $) cumulative query
                 execution times for query workloads with increasing
                 size and unpredictable access patterns.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2017:MFJ,
  author =       "Yinan Li and Nikos R. Katsipoulakis and Badrish
                 Chandramouli and Jonathan Goldstein and Donald
                 Kossmann",
  title =        "{Mison}: a fast {JSON} parser for data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1118--1129",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The growing popularity of the JSON format has fueled
                 increased interest in loading and processing JSON data
                 within analytical data processing systems. However, in
                 many applications, JSON parsing dominates performance
                 and cost. In this paper, we present a new JSON parser
                 called Mison that is particularly tailored to this
                 class of applications, by pushing down both projection
                 and filter operators of analytical queries into the
                 parser. To achieve these features, we propose to
                 deviate from the traditional approach of building
                 parsers using finite state machines (FSMs). Instead, we
                 follow a two-level approach that enables the parser to
                 jump directly to the correct position of a queried
                 field without having to perform expensive tokenizing
                 steps to find the field. At the upper level, Mison
                 speculatively predicts the logical locations of queried
                 fields based on previously seen patterns in a dataset.
                 At the lower level, Mison builds structural indices on
                 JSON data to map logical locations to physical
                 locations. Unlike all existing FSM-based parsers,
                 building structural indices converts control flow into
                 data flow, thereby largely eliminating inherently
                 unpredictable branches in the program and exploiting
                 the parallelism available in modern processors. We
                 experimentally evaluate Mison using representative
                 real-world JSON datasets and the TPC-H benchmark, and
                 show that Mison produces significant performance
                 benefits over the best existing JSON parsers; in some
                 cases, the performance improvement is over one order of
                 magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2017:OBV,
  author =       "Silu Huang and Liqi Xu and Jialin Liu and Aaron J.
                 Elmore and Aditya Parameswaran",
  title =        "{OrpheusDB}: bolt-on versioning for relational
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1130--1141",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data science teams often collaboratively analyze
                 datasets, generating dataset versions at each stage of
                 iterative exploration and analysis. There is a pressing
                 need for a system that can support dataset versioning,
                 enabling such teams to efficiently store, track, and
                 query across dataset versions. We introduce OrpheusDB,
                 a dataset version control system that ``bolts on ''
                 versioning capabilities to a traditional relational
                 database system, thereby gaining the analytics
                 capabilities of the database ``for free''. We develop
                 and evaluate multiple data models for representing
                 versioned data, as well as a light-weight partitioning
                 scheme, LyreSplit, to further optimize the models for
                 reduced query latencies. With LyreSplit, OrpheusDB is
                 on average $ 10^3 \times $ faster in finding effective
                 (and better) partitionings than competing approaches,
                 while also reducing the latency of version retrieval by
                 up to $ 20 \times $ relative to schemes without
                 partitioning. LyreSplit can be applied in an online
                 fashion as new versions are added, alongside an
                 intelligent migration scheme that reduces migration
                 time by $ 10 \times $ on average.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Galakatos:2017:RRA,
  author =       "Alex Galakatos and Andrew Crotty and Emanuel Zgraggen
                 and Carsten Binnig and Tim Kraska",
  title =        "Revisiting reuse for approximate query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1142--1153",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Visual data exploration tools allow users to quickly
                 gather insights from new datasets. As dataset sizes
                 continue to increase, though, new techniques will be
                 necessary to maintain the interactivity guarantees that
                 these tools require. Approximate query processing (AQP)
                 attempts to tackle this problem and allows systems to
                 return query results at ``human speed.'' However,
                 existing AQP techniques start to break down when
                 confronted with ad hoc queries that target the tails of
                 the distribution. We therefore present an AQP
                 formulation that can provide low-error approximate
                 results at interactive speeds, even for queries over
                 rare subpopulations. In particular, our formulation
                 treats query results as random variables in order to
                 leverage the ample opportunities for result reuse
                 inherent in interactive data exploration. As part of
                 our approach, we apply a variety of optimization
                 techniques that are based on probability theory,
                 including new query rewrite rules and index structures.
                 We implemented these techniques in a prototype system
                 and show that they can achieve interactivity where
                 alternative approaches cannot.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Orr:2017:PDS,
  author =       "Laurel Orr and Magdalena Balazinska and Dan Suciu",
  title =        "Probabilistic database summarization for interactive
                 data exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "10",
  pages =        "1154--1165",
  month =        jun,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 17:12:46 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a probabilistic approach to generate a
                 small, query-able summary of a dataset for interactive
                 data exploration. Departing from traditional
                 summarization techniques, we use the Principle of
                 Maximum Entropy to generate a probabilistic
                 representation of the data that can be used to give
                 approximate query answers. We develop the theoretical
                 framework and formulation of our probabilistic
                 representation and show how to use it to answer
                 queries. We then present solving techniques and give
                 three critical optimizations to improve preprocessing
                 time and query accuracy. Lastly, we experimentally
                 evaluate our work using a 5 GB dataset of flights
                 within the United States and a 210 GB dataset from an
                 astronomy particle simulation. While our current work
                 only supports linear queries, we show that our
                 technique can successfully answer queries faster than
                 sampling while introducing, on average, no more error
                 than sampling and can better distinguish between rare
                 and nonexistent values.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Oukid:2017:MMT,
  author =       "Ismail Oukid and Daniel Booss and Adrien Lespinasse
                 and Wolfgang Lehner and Thomas Willhalm and
                 Gr{\'e}goire Gomes",
  title =        "Memory management techniques for large-scale
                 persistent-main-memory systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1166--1177",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Storage Class Memory (SCM) is a novel class of memory
                 technologies that promise to revolutionize database
                 architectures. SCM is byte-addressable and exhibits
                 latencies similar to those of DRAM, while being
                 non-volatile. Hence, SCM could replace both main memory
                 and storage, enabling a novel single-level database
                 architecture without the traditional I/O bottleneck.
                 Fail-safe persistent SCM allocation can be considered
                 conditio sine qua non for enabling this novel
                 architecture paradigm for database management systems.
                 In this paper we present PAllocator, a fail-safe
                 persistent SCM allocator whose design emphasizes high
                 concurrency and capacity scalability. Contrary to
                 previous works, PAllocator thoroughly addresses the
                 important challenge of persistent memory fragmentation
                 by implementing an efficient defragmentation algorithm.
                 We show that PAllocator outperforms state-of-the-art
                 persistent allocators by up to one order of magnitude,
                 both in operation throughput and recovery time, and
                 enables up to $ 2.39 \times $ higher operation
                 throughput on a persistent B-Tree.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shang:2017:TSJ,
  author =       "Shuo Shang and Lisi Chen and Zhewei Wei and Christian
                 S. Jensen and Kai Zheng and Panos Kalnis",
  title =        "Trajectory similarity join in spatial networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1178--1189",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The matching of similar pairs of objects, called
                 similarity join, is fundamental functionality in data
                 management. We consider the case of trajectory
                 similarity join (TS-Join), where the objects are
                 trajectories of vehicles moving in road networks. Thus,
                 given two sets of trajectories and a threshold $ \theta
                 $, the TS-Join returns all pairs of trajectories from
                 the two sets with similarity above $ \theta $. This
                 join targets applications such as trajectory
                 near-duplicate detection, data cleaning, ridesharing
                 recommendation, and traffic congestion prediction. With
                 these applications in mind, we provide a purposeful
                 definition of similarity. To enable efficient TS-Join
                 processing on large sets of trajectories, we develop
                 search space pruning techniques and take into account
                 the parallel processing capabilities of modern
                 processors. Specifically, we present a two-phase
                 divide-and-conquer algorithm. For each trajectory, the
                 algorithm first finds similar trajectories. Then it
                 merges the results to achieve a final result. The
                 algorithm exploits an upper bound on the spatiotemporal
                 similarity and a heuristic scheduling strategy for
                 search space pruning. The algorithm's per-trajectory
                 searches are independent of each other and can be
                 performed in parallel, and the merging has constant
                 cost. An empirical study with real data offers insight
                 in the performance of the algorithm and demonstrates
                 that is capable of outperforming a well-designed
                 baseline algorithm by an order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rekatsinas:2017:HHD,
  author =       "Theodoros Rekatsinas and Xu Chu and Ihab F. Ilyas and
                 Christopher R{\'e}",
  title =        "{HoloClean}: holistic data repairs with probabilistic
                 inference",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1190--1201",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We introduce HoloClean, a framework for holistic data
                 repairing driven by probabilistic inference. HoloClean
                 unifies qualitative data repairing, which relies on
                 integrity constraints or external data sources, with
                 quantitative data repairing methods, which leverage
                 statistical properties of the input data. Given an
                 inconsistent dataset as input, HoloClean automatically
                 generates a probabilistic program that performs data
                 repairing. Inspired by recent theoretical advances in
                 probabilistic inference, we introduce a series of
                 optimizations which ensure that inference over
                 HoloClean's probabilistic model scales to instances
                 with millions of tuples. We show that HoloClean finds
                 data repairs with an average precision of $ \approx $
                 90\% and an average recall of above $ \approx $ 76\%
                 across a diverse array of datasets exhibiting different
                 types of errors. This yields an average F1 improvement
                 of more than $ 2 \times $ against state-of-the-art
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Istvan:2017:CID,
  author =       "Zsolt Istv{\'a}n and David Sidler and Gustavo Alonso",
  title =        "{Caribou}: intelligent distributed storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1202--1213",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ever increasing amount of data being handled in
                 data centers causes an intrinsic inefficiency: moving
                 data around is expensive in terms of bandwidth,
                 latency, and power consumption, especially given the
                 low computational complexity of many database
                 operations. In this paper we explore near-data
                 processing in database engines, i.e., the option of
                 offloading part of the computation directly to the
                 storage nodes. We implement our ideas in Caribou, an
                 intelligent distributed storage layer incorporating
                 many of the lessons learned while building systems with
                 specialized hardware. Caribou provides access to
                 DRAM/NVRAM storage over the network through a simple
                 key--value store interface, with each storage node
                 providing high-bandwidth near-data processing at line
                 rate and fault tolerance through replication. The
                 result is a highly efficient, distributed, intelligent
                 data storage that can be used to both boost performance
                 and reduce power consumption and real estate usage in
                 the data center thanks to the micro-server architecture
                 adopted.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2017:TLA,
  author =       "Lingjiao Chen and Arun Kumar and Jeffrey Naughton and
                 Jignesh M. Patel",
  title =        "Towards linear algebra over normalized data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1214--1225",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Providing machine learning (ML) over relational data
                 is a mainstream requirement for data analytics systems.
                 While almost all ML tools require the input data to be
                 presented as a single table, many datasets are
                 multi-table. This forces data scientists to join those
                 tables first, which often leads to data redundancy and
                 runtime waste. Recent works on ``factorized'' ML
                 mitigate this issue for a few specific ML algorithms by
                 pushing ML through joins. But their approaches require
                 a manual rewrite of ML implementations. Such piecemeal
                 methods create a massive development overhead when
                 extending such ideas to other ML algorithms. In this
                 paper, we show that it is possible to mitigate this
                 overhead by leveraging a popular formal algebra to
                 represent the computations of many ML algorithms:
                 linear algebra. We introduce a new logical data type to
                 represent normalized data and devise a framework of
                 algebraic rewrite rules to convert a large set of
                 linear algebra operations over denormalized data into
                 operations over normalized data. We show how this
                 enables us to automatically ``factorize'' several
                 popular ML algorithms, thus unifying and generalizing
                 several prior works. We prototype our framework in the
                 popular ML environment R and an industrial R-over-RDBMS
                 tool. Experiments with both synthetic and real
                 normalized data show that our framework also yields
                 significant speed-ups, up to $ 36 \times $ on real
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mehta:2017:CEB,
  author =       "Parmita Mehta and Sven Dorkenwald and Dongfang Zhao
                 and Tomer Kaftan and Alvin Cheung and Magdalena
                 Balazinska and Ariel Rokem and Andrew Connolly and
                 Jacob Vanderplas and Yusra AlSayyad",
  title =        "Comparative evaluation of big-data systems on
                 scientific image analytics workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1226--1237",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Scientific discoveries are increasingly driven by
                 analyzing large volumes of image data. Many new
                 libraries and specialized database management systems
                 (DBMSs) have emerged to support such tasks. It is
                 unclear how well these systems support real-world image
                 analysis use cases, and how performant the image
                 analytics tasks implemented on top of such systems are.
                 In this paper, we present the first comprehensive
                 evaluation of large-scale image analysis systems using
                 two real-world scientific image data processing use
                 cases. We evaluate five representative systems (SciDB,
                 Myria, Spark, Dask, and TensorFlow) and find that each
                 of them has shortcomings that complicate implementation
                 or hurt performance. Such shortcomings lead to new
                 research opportunities in making large-scale image
                 analysis both efficient and easy to use.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aslay:2017:RMI,
  author =       "Cigdem Aslay and Francesco Bonchi and Laks V. S.
                 Lakshmanan and Wei Lu",
  title =        "Revenue maximization in incentivized social
                 advertising",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1238--1249",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Incentivized social advertising, an emerging marketing
                 model, provides monetization opportunities not only to
                 the owners of the social networking platforms but also
                 to their influential users by offering a ``cut'' on the
                 advertising revenue. We consider a social network (the
                 host) that sells ad-engagements to advertisers by
                 inserting their ads, in the form of promoted posts,
                 into the feeds of carefully selected ``initial
                 endorsers'' or seed users: these users receive monetary
                 incentives in exchange for their endorsements. The
                 endorsements help propagate the ads to the feeds of
                 their followers. Whenever any user engages with an ad,
                 the host is paid some fixed amount by the advertiser,
                 and the ad further propagates to the feed of her
                 followers, potentially recursively. In this context,
                 the problem for the host is is to allocate ads to
                 influential users, taking into account the propensity
                 of ads for viral propagation, and carefully
                 apportioning the monetary budget of each of the
                 advertisers between incentives to influential users and
                 ad-engagement costs, with the rational goal of
                 maximizing its own revenue. We show that, taking all
                 important factors into account, the problem of revenue
                 maximization in incentivized social advertising
                 corresponds to the problem of monotone submodular
                 function maximization, subject to a partition matroid
                 constraint on the ads-to-seeds allocation, and
                 submodular knapsack constraints on the advertisers'
                 budgets. We show that this problem is NP-hard and
                 devise two greedy algorithms with provable
                 approximation guarantees, which differ in their
                 sensitivity to seed user incentive costs. Our
                 approximation algorithms require repeatedly estimating
                 the expected marginal gain in revenue as well as in
                 advertiser payment. By exploiting a connection to the
                 recent advances made in scalable estimation of expected
                 influence spread, we devise efficient and scalable
                 versions of our two greedy algorithms. An extensive
                 experimental assessment confirms the high quality of
                 our proposal.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rupprecht:2017:SNA,
  author =       "Lukas Rupprecht and William Culhane and Peter
                 Pietzuch",
  title =        "{SquirrelJoin}: network-aware distributed join
                 processing with lazy partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1250--1261",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To execute distributed joins in parallel on compute
                 clusters, systems partition and exchange data records
                 between workers. With large datasets, workers spend a
                 considerable amount of time transferring data over the
                 network. When compute clusters are shared among
                 multiple applications, workers must compete for network
                 bandwidth with other applications. These variances in
                 the available network bandwidth lead to network skew,
                 which causes straggling workers to prolong the join
                 completion time. We describe SquirrelJoin, a
                 distributed join processing technique that uses lazy
                 partitioning to adapt to transient network skew in
                 clusters. Workers maintain in-memory lazy partitions to
                 withhold a subset of records, i.e. not sending them
                 immediately to other workers for processing. Lazy
                 partitions are then assigned dynamically to other
                 workers based on network conditions: each worker takes
                 periodic throughput measurements to estimate its
                 completion time, and lazy partitions are allocated as
                 to minimise the join completion time. We implement
                 SquirrelJoin as part of the Apache Flink distributed
                 dataflow framework and show that, under transient
                 network contention in a shared compute cluster,
                 SquirrelJoin speeds up join completion times by up to $
                 2.9 \times $ with only a small, fixed overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rahman:2017:ISE,
  author =       "Sajjadur Rahman and Maryam Aliakbarpour and Ha Kyung
                 Kong and Eric Blais and Karrie Karahalios and Aditya
                 Parameswaran and Ronitt Rubinfield",
  title =        "{I}'ve seen ``enough'': incrementally improving
                 visualizations to support rapid decision making",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1262--1273",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data visualization is an effective mechanism for
                 identifying trends, insights, and anomalies in data. On
                 large datasets, however, generating visualizations can
                 take a long time, delaying the extraction of insights,
                 hampering decision making, and reducing exploration
                 time. One solution is to use online sampling-based
                 schemes to generate visualizations faster while
                 improving the displayed estimates incrementally,
                 eventually converging to the exact visualization
                 computed on the entire data. However, the intermediate
                 visualizations are approximate, and often fluctuate
                 drastically, leading to potentially incorrect
                 decisions. We propose sampling-based incremental
                 visualization algorithms that reveal the ``salient''
                 features of the visualization quickly --- with a $ 46
                 \times $ speedup relative to baselines --- while
                 minimizing error, thus enabling rapid and error-free
                 decision making. We demonstrate that these algorithms
                 are optimal in terms of sample complexity, in that
                 given the level of interactivity, they generate
                 approximations that take as few samples as possible. We
                 have developed the algorithms in the context of an
                 incremental visualization tool, titled I ncVisage, for
                 trendline and heatmap visualizations. We evaluate the
                 usability of IncVisage via user studies and demonstrate
                 that users are able to make effective decisions with
                 incrementally improving visualizations, especially
                 compared to vanilla online-sampling based schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2017:MRT,
  author =       "Lei Li and Wen Hua and Xingzhong Du and Xiaofang
                 Zhou",
  title =        "Minimal on-road time route scheduling on
                 time-dependent graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1274--1285",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "On time-dependent graphs, fastest path query is an
                 important problem and has been well studied. It focuses
                 on minimizing the total travel time (waiting time +
                 on-road time) but does not allow waiting on any
                 intermediate vertex if the FIFO property is applied.
                 However, in practice, waiting on a vertex can reduce
                 the time spent on the road (for example, resuming
                 traveling after a traffic jam). In this paper, we study
                 how to find a path with the minimal on-road time on
                 time-dependent graphs by allowing waiting on some
                 predefined parking vertices. The existing works are
                 based on the following fact: the arrival time of a
                 vertex v is determined by the arrival time of its
                 in-neighbor u, which does not hold in our scenario
                 since we also consider the waiting time on u if u
                 allows waiting. Thus, determining the waiting time on
                 each parking vertex to achieve the minimal on-road time
                 becomes a big challenge, which further breaks FIFO
                 property. To cope with this challenging problem, we
                 propose two efficient algorithms using minimum on-road
                 travel cost function to answer the query. The
                 evaluations on multiple real-world time-dependent
                 graphs show that the proposed algorithms are more
                 accurate and efficient than the extensions of existing
                 algorithms. In addition, the results further indicate,
                 if the parking facilities are enabled in the route
                 scheduling algorithms, the on-road time will reduce
                 significantly compared to the fastest path
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Katsipoulakis:2017:HVS,
  author =       "Nikos R. Katsipoulakis and Alexandros Labrinidis and
                 Panos K. Chrysanthis",
  title =        "A holistic view of stream partitioning costs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1286--1297",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Stream processing has become the dominant processing
                 model for monitoring and real-time analytics. Modern
                 Parallel Stream Processing Engines (pSPEs) have made it
                 feasible to increase the performance in both monitoring
                 and analytical queries by parallelizing a query's
                 execution and distributing the load on multiple
                 workers. A determining factor for the performance of a
                 pSPE is the partitioning algorithm used to disseminate
                 tuples to workers. Until now, partitioning methods in
                 pSPEs have been similar to the ones used in parallel
                 databases and only recently load-aware algorithms have
                 been employed to improve the effectiveness of parallel
                 execution. We identify and demonstrate the need to
                 incorporate aggregation costs in the partitioning model
                 when executing stateful operations in parallel, in
                 order to minimize the overall latency and/or
                 through-put. Towards this, we propose new stream
                 partitioning algorithms, that consider both tuple
                 imbalance and aggregation cost. We evaluate our
                 proposed algorithms and show that they can achieve up
                 to an order of magnitude better performance, compared
                 to the current state of the art.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Akbas:2017:TBC,
  author =       "Esra Akbas and Peixiang Zhao",
  title =        "Truss-based community search: a truss-equivalence
                 based indexing approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1298--1309",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We consider the community search problem defined upon
                 a large graph G: given a query vertex q in G, to find
                 as output all the densely connected subgraphs of G,
                 each of which contains the query v. As an online,
                 query-dependent variant of the well-known community
                 detection problem, community search enables
                 personalized community discovery that has found widely
                 varying applications in real-world, large-scale graphs.
                 In this paper, we study the community search problem in
                 the truss-based model aimed at discovering all dense
                 and cohesive k -truss communities to which the query
                 vertex q belongs. We introduce a novel equivalence
                 relation, k-truss equivalence, to model the intrinsic
                 density and cohesiveness of edges in k -truss
                 communities. Consequently, all the edges of G can be
                 partitioned to a series of k -truss equivalence classes
                 that constitute a space-efficient, truss-preserving
                 index structure, EquiTruss. Community search can be
                 henceforth addressed directly upon EquiTruss without
                 repeated, time-demanding accesses to the original
                 graph, G, which proves to be theoretically optimal. In
                 addition, EquiTruss can be efficiently updated in a
                 dynamic fashion when G evolves with edge insertion and
                 deletion. Experimental studies in real-world,
                 large-scale graphs validate the efficiency and
                 effectiveness of EquiTruss, which has achieved at least
                 an order of magnitude speedup in community search over
                 the state-of-the-art method, TCP-Index.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cambronero:2017:QOD,
  author =       "Jos{\'e} Cambronero and John K. Feser and Micah J.
                 Smith and Samuel Madden",
  title =        "Query optimization for dynamic imputation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1310--1321",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Missing values are common in data analysis and present
                 a usability challenge. Users are forced to pick between
                 removing tuples with missing values or creating a
                 cleaned version of their data by applying a relatively
                 expensive imputation strategy. Our system, ImputeDB,
                 incorporates imputation into a cost-based query
                 optimizer, performing necessary imputations on-the-fly
                 for each query. This allows users to immediately
                 explore their data, while the system picks the optimal
                 placement of imputation operations. We evaluate this
                 approach on three real-world survey-based datasets. Our
                 experiments show that our query plans execute between
                 10 and 140 times faster than first imputing the base
                 tables. Furthermore, we show that the query results
                 from on-the-fly imputation differ from the traditional
                 base-table imputation approach by 0--8\%. Finally, we
                 show that while dropping tuples with missing values
                 that fail query constraints discards 6--78\% of the
                 data, on-the-fly imputation loses only 0--21\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Marchant:2017:SER,
  author =       "Neil G. Marchant and Benjamin I. P. Rubinstein",
  title =        "In search of an entity resolution {OASIS}: optimal
                 asymptotic sequential importance sampling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1322--1333",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity resolution (ER) presents unique challenges for
                 evaluation methodology. While crowdsourcing platforms
                 acquire ground truth, sound approaches to sampling must
                 drive labelling efforts. In ER, extreme class imbalance
                 between matching and non-matching records can lead to
                 enormous labelling requirements when seeking
                 statistically consistent estimates for rigorous
                 evaluation. This paper addresses this important
                 challenge with the OASIS algorithm: a sampler and
                 F-measure estimator for ER evaluation. OASIS draws
                 samples from a (biased) instrumental distribution,
                 chosen to ensure estimators with optimal asymptotic
                 variance. As new labels are collected OASIS updates
                 this instrumental distribution via a Bayesian latent
                 variable model of the annotator oracle, to quickly
                 focus on unlabelled items providing more information.
                 We prove that resulting estimates of F-measure,
                 precision, recall converge to the true population
                 values. Thorough comparisons of sampling methods on a
                 variety of ER datasets demonstrate significant
                 labelling reductions of up to 83\% without loss to
                 estimate accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tong:2017:FOT,
  author =       "Yongxin Tong and Libin Wang and Zimu Zhou and Bolin
                 Ding and Lei Chen and Jieping Ye and Ke Xu",
  title =        "Flexible online task assignment in real-time spatial
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1334--1345",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The popularity of Online To Offline (O2O) service
                 platforms has spurred the need for online task
                 assignment in real-time spatial data, where streams of
                 spatially distributed tasks and workers are matched in
                 real time such that the total number of assigned pairs
                 is maximized. Existing online task assignment models
                 assume that each worker is either assigned a task
                 immediately or waits for a subsequent task at a fixed
                 location once she/he appears on the platform. Yet in
                 practice a worker may actively move around rather than
                 passively wait in place if no task is assigned. In this
                 paper, we define a new problem Flexible Two-sided
                 Online task Assignment (FTOA). FTOA aims to guide idle
                 workers based on the prediction of tasks and workers so
                 as to increase the total number of assigned worker-task
                 pairs. To address the FTOA problem, we face two
                 challenges: (i) How to generate guidance for idle
                 workers based on the prediction of the spatiotemporal
                 distribution of tasks and workers? (ii) How to leverage
                 the guidance of workers' movements to optimize the
                 online task assignment? To this end, we propose a novel
                 two-step framework, which integrates offline prediction
                 and online task assignment. Specifically, we estimate
                 the distributions of tasks and workers per time slot
                 and per unit area, and design an online task assignment
                 algorithm, Prediction-oriented Online task Assignment
                 in Real-time spatial data (POLAR-OP). It yields a
                 0.47-competitive ratio, which is nearly twice better
                 than that of the state-of-the-art. POLAR-OP also
                 reduces the time complexity to process each
                 newly-arrived task/worker to $ O(1) $. We validate the
                 effectiveness and efficiency of our methods via
                 extensive experiments on both synthetic datasets and
                 real-world datasets from a large-scale taxi-calling
                 platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bouros:2017:FSB,
  author =       "Panagiotis Bouros and Nikos Mamoulis",
  title =        "A forward scan based plane sweep algorithm for
                 parallel interval joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1346--1357",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The interval join is a basic operation that finds
                 application in temporal, spatial, and uncertain
                 databases. Although a number of centralized and
                 distributed algorithms have been proposed for the
                 efficient evaluation of interval joins, classic plane
                 sweep approaches have not been considered at their full
                 potential. A recent piece of related work proposes an
                 optimized approach based on plane sweep (PS) for modern
                 hardware, showing that it greatly outperforms previous
                 work. However, this approach depends on the development
                 of a complex data structure and its parallelization has
                 not been adequately studied. In this paper, we explore
                 the applicability of a largely ignored forward scan
                 (FS) based plane sweep algorithm, which is extremely
                 simple to implement. We propose two optimizations of FS
                 that greatly reduce its cost, making it competitive to
                 the state-of-the-art single-threaded PS algorithm while
                 achieving a lower memory footprint. In addition, we
                 show the drawbacks of a previously proposed hash-based
                 partitioning approach for parallel join processing and
                 suggest a domain-based partitioning approach that does
                 not produce duplicate results. Within our approach we
                 propose a novel breakdown of the partition join jobs
                 into a small number of independent mini-join jobs with
                 varying cost and manage to avoid redundant comparisons.
                 Finally, we show how these mini-joins can be scheduled
                 in multiple CPU cores and propose an adaptive domain
                 partitioning, aiming at load balancing. We include an
                 experimental study that demonstrates the efficiency of
                 our optimized FS and the scalability of our
                 parallelization framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rong:2017:APA,
  author =       "Kexin Rong and Peter Bailis",
  title =        "{ASAP}: prioritizing attention via time series
                 smoothing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1358--1369",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Time series visualization of streaming telemetry
                 (i.e., charting of key metrics such as server load over
                 time) is increasingly prevalent in modern data
                 platforms and applications. However, many existing
                 systems simply plot the raw data streams as they
                 arrive, often obscuring large-scale trends due to
                 small-scale noise. We propose an alternative: to better
                 prioritize end users' attention, smooth time series
                 visualizations as much as possible to remove noise,
                 while retaining large-scale structure to highlight
                 significant deviations. We develop a new analytics
                 operator called ASAP that automatically smooths
                 streaming time series by adaptively optimizing the
                 trade-off between noise reduction (i.e., variance) and
                 trend retention (i.e., kurtosis). We introduce metrics
                 to quantitatively assess the quality of smoothed plots
                 and provide an efficient search strategy for optimizing
                 these metrics that combines techniques from stream
                 processing, user interface design, and signal
                 processing via autocorrelation-based pruning,
                 pixel-aware preaggregation, and on-demand refresh. We
                 demonstrate that ASAP can improve users' accuracy in
                 identifying long-term deviations in time series by up
                 to 38.4\% while reducing response times by up to
                 44.3\%. Moreover, ASAP delivers these results several
                 orders of magnitude faster than alternative search
                 strategies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2017:KVL,
  author =       "Furong Li and Xin Luna Dong and Anno Langen and Yang
                 Li",
  title =        "Knowledge verification for long-tail verticals",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1370--1381",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Collecting structured knowledge for real-world
                 entities has become a critical task for many
                 applications. A big gap between the knowledge in
                 existing knowledge repositories and the knowledge in
                 the real world is the knowledge on tail verticals
                 (i.e., less popular domains). Such knowledge, though
                 not necessarily globally popular, can be personal
                 hobbies to many people and thus collectively impactful.
                 This paper studies the problem of knowledge
                 verification for tail verticals; that is, deciding the
                 correctness of a given triple. Through comprehensive
                 experimental study we answer the following questions.
                 (1) Can we find evidence for tail knowledge from an
                 extensive set of sources, including knowledge bases,
                 the web, and query logs? (2) Can we judge correctness
                 of the triples based on the collected evidence? (3) How
                 can we further improve knowledge verification on tail
                 verticals? Our empirical study suggests a new
                 knowledge-verification framework, which we call Facty,
                 that applies various kinds of evidence collection
                 techniques followed by knowledge fusion. Facty can
                 verify 50\% of the (correct) tail knowledge with a
                 precision of 84\%, and it significantly outperforms
                 state-of-the-art methods. Detailed error analysis on
                 the obtained results suggests future research
                 directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pande:2017:SRR,
  author =       "Shiladitya Pande and Sayan Ranu and Arnab
                 Bhattacharya",
  title =        "{SkyGraph}: retrieving regions of interest using
                 skyline subgraph queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1382--1393",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Several services today are annotated with points of
                 interest (PoIs) such as ``coffee shop'', ``park'', etc.
                 A region of interest (RoI) is a neighborhood that
                 contains PoIs relevant to the user. In this paper, we
                 study the scenario where a user wants to identify the
                 best RoI in a city. The user expresses relevance
                 through a set of keywords denoting PoIs. Ideally, the
                 RoI should be small enough in size such that the user
                 can conveniently explore the PoIs. On the other hand,
                 it should be as relevant as possible. How does one
                 balance the importance of size versus relevance? To a
                 user exploring the RoI on foot, size is more critical.
                 However, for a user equipped with a vehicle, relevance
                 is a more important factor. In this paper, we solve
                 this dilemma through skyline subgraph queries on
                 keyword-embedded road networks. Skyline subgraphs
                 subsume the choice of optimization function for an RoI
                 since the optimal RoI for any rational user is
                 necessarily a part of the skyline set. Our analysis
                 reveals that the problem of computing the skyline set
                 is NP-hard. We overcome the computational bottleneck by
                 proposing a polynomial-time approximation algorithm
                 called SkyGraph. To further expedite the running time,
                 we develop an index structure, Partner Index, that
                 drastically prunes the search space and provides up to
                 3 orders of magnitude speed-up on real road networks
                 over the baseline approach. The datasets and
                 executables are available at
                 http://www.cse.iitd.ac.in/~sayan/software.html.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tan:2017:REA,
  author =       "Wei Chit Tan and Meihui Zhang and Hazem Elmeleegy and
                 Divesh Srivastava",
  title =        "Reverse engineering aggregation queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1394--1405",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query reverse engineering seeks to re-generate the SQL
                 query that produced a given query output table from a
                 given database. In this paper, we solve this problem
                 for OLAP queries with group-by and aggregation. We
                 develop a novel three-phase algorithm named REGAL$^1$
                 for this problem. First, based on a lattice graph
                 structure, we identify a set of group-by candidates for
                 the desired query. Second, we apply a set of
                 aggregation constraints that are derived from the
                 properties of aggregate operators at both the
                 table-level and the group-level to discover candidate
                 combinations of group-by columns and aggregations that
                 are consistent with the given query output table.
                 Finally, we find a multi-dimensional filter, i.e., a
                 conjunction of selection predicates over the base table
                 attributes, that is needed to generate the exact query
                 output table. We conduct an extensive experimental
                 study over the TPC-H dataset to demonstrate the
                 effectiveness and efficiency of our proposal.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yut:2017:LRL,
  author =       "Lele Yut and Ce Zhang and Yingxia Shao and Bin Cui",
  title =        "{LDA*}: a robust and large-scale topic modeling
                 system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1406--1417",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present LDA*, a system that has been deployed in
                 one of the largest Internet companies to fulfil their
                 requirements of ``topic modeling as an internal
                 service'' --- relying on thousands of machines,
                 engineers in different sectors submit their data, some
                 are as large as 1.8TB, to LDA* and get results back in
                 hours. LDA* is motivated by the observation that none
                 of the existing topic modeling systems is robust enough
                 --- Each of these existing systems is designed for a
                 specific point in the tradeoff space that can be
                 sub-optimal, sometimes by up to $ 10 \times $, across
                 workloads. Our first contribution is a systematic study
                 of all recently proposed samplers: AliasLDA, F+LDA,
                 LightLDA, and WarpLDA. We discovered a novel system
                 tradeoff among these samplers. Each sampler has
                 different sampling complexity and performs differently,
                 sometimes by $ 5 \times $, on documents with different
                 lengths. Based on this tradeoff, we further developed a
                 hybrid sampler that uses different samplers for
                 different types of documents. This hybrid approach
                 works across a wide range of workloads and outperforms
                 the fastest sampler by up to $ 2 \times $. We then
                 focused on distributed environments in which thousands
                 of workers, each with different performance (due to
                 virtualization and resource sharing), coordinate to
                 train a topic model. Our second contribution is an
                 asymmetric parameter server architecture that pushes
                 some computation to the parameter server side. This
                 architecture is motivated by the skew of the word
                 frequency distribution and a novel tradeoff we
                 discovered between communication and computation. With
                 this architecture, we outperform the traditional,
                 symmetric architecture by up to $ 2 \times $. With
                 these two contributions, together with a carefully
                 engineered implementation, our system is able to
                 outperform existing systems by up to $ 10 \times $ and
                 has already been running to provide topic modeling
                 services for more than six months.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kabiljo:2017:SHP,
  author =       "Igor Kabiljo and Brian Karrer and Mayank Pundir and
                 Sergey Pupyrev and Alon Shalita",
  title =        "Social hash partitioner: a scalable distributed
                 hypergraph partitioner",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1418--1429",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We design and implement a distributed algorithm for
                 balanced $k$-way hypergraph partitioning that minimizes
                 fanout, a fundamental hypergraph quantity also known as
                 the communication volume and $ (k - 1)$-cut metric, by
                 optimizing a novel objective called probabilistic
                 fanout. This choice allows a simple local search
                 heuristic to achieve comparable solution quality to the
                 best existing hypergraph partitioners. Our algorithm is
                 arbitrarily scalable due to a careful design that
                 controls computational complexity, space complexity,
                 and communication. In practice, we commonly process
                 hypergraphs with billions of vertices and hyperedges in
                 a few hours. We explain how the algorithm's
                 scalability, both in terms of hypergraph size and
                 bucket count, is limited only by the number of machines
                 available. We perform an extensive comparison to
                 existing distributed hypergraph partitioners and find
                 that our approach is able to optimize hypergraphs
                 roughly 100 times bigger on the same set of machines.
                 We call the resulting tool Social Hash Partitioner, and
                 accompanying this paper, we open-source the most
                 scalable version based on recursive bisection.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ahmed:2017:SMG,
  author =       "Nesreen K. Ahmed and Nick Duffield and Theodore L.
                 Willke and Ryan A. Rossi",
  title =        "On sampling from massive graph streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1430--1441",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose Graph Priority Sampling (gps), a new
                 paradigm for order-based reservoir sampling from
                 massive graph streams. gps provides a general way to
                 weight edge sampling according to auxiliary and/or size
                 variables so as to accomplish various estimation goals
                 of graph properties. In the context of subgraph
                 counting, we show how edge sampling weights can be
                 chosen so as to minimize the estimation variance of
                 counts of specified sets of subgraphs. In distinction
                 with many prior graph sampling schemes, gps separates
                 the functions of edge sampling and subgraph estimation.
                 We propose two estimation frameworks: (1) Post-Stream
                 estimation, to allow gps to construct a reference
                 sample of edges to support retrospective graph queries,
                 and (2) In-Stream estimation, to allow gps to obtain
                 lower variance estimates by incrementally updating the
                 subgraph count estimates during stream processing.
                 Unbiasedness of subgraph estimators is established
                 through a new Martingale formulation of graph stream
                 order sampling, in which subgraph estimators, written
                 as a product of constituent edge estimators, are
                 unbiased, even when computed at different points in the
                 stream. The separation of estimation and sampling
                 enables significant resource savings relative to
                 previous work. We illustrate our framework with
                 applications to triangle and wedge counting. We perform
                 a large-scale experimental study on real-world graphs
                 from various domains and types. gps achieves high
                 accuracy with < 1\% error for triangle and wedge
                 counting, while storing a small fraction of the graph
                 with average update times of a few microseconds per
                 edge. Notably, for billion-scale graphs, gps accurately
                 estimates triangle and wedge counts with < 1\% error,
                 while storing a small fraction of < 0.01\% of the total
                 edges in the graph.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2017:PSS,
  author =       "Tong Yang and Yang Zhou and Hao Jin and Shigang Chen
                 and Xiaoming Li",
  title =        "Pyramid sketch: a sketch framework for frequency
                 estimation of data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1442--1453",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Sketch is a probabilistic data structure, and is used
                 to store and query the frequency of any item in a given
                 multiset. Due to its high memory efficiency, it has
                 been applied to various fields in computer science,
                 such as stream database, network traffic measurement,
                 etc. The key metrics of sketches for data streams are
                 accuracy, speed, and memory usage. Various sketches
                 have been proposed, but they cannot achieve both high
                 accuracy and high speed using limited memory,
                 especially for skewed datasets. To address this issue,
                 we propose a sketch framework, the Pyramid sketch,
                 which can significantly improve accuracy as well as
                 update and query speed. To verify the effectiveness and
                 efficiency of our framework, we applied our framework
                 to four typical sketches. Extensive experimental
                 results show that the accuracy is improved up to 3.50
                 times, while the speed is improved up to 2.10 times. We
                 have released our source codes at Github [1].",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ciaccia:2017:RSR,
  author =       "Paolo Ciaccia and Davide Martinenghi",
  title =        "Reconciling skyline and ranking queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1454--1465",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditionally, skyline and ranking queries have been
                 treated separately as alternative ways of discovering
                 interesting data in potentially large datasets. While
                 ranking queries adopt a specific scoring function to
                 rank tuples, skyline queries return the set of
                 non-dominated tuples and are independent of attribute
                 scales and scoring functions. Ranking queries are thus
                 less general, but usually cheaper to compute and widely
                 used in data management systems. We propose a framework
                 to seamlessly integrate these two approaches by
                 introducing the notion of restricted skyline queries
                 (R-skylines). We propose R-skyline operators that
                 generalize both skyline and ranking queries by applying
                 the notion of dominance to a set of scoring functions
                 of interest. Such sets can be characterized, e.g., by
                 imposing constraints on the function's parameters, such
                 as the weights in a linear scoring function. We discuss
                 the formal properties of these new operators, show how
                 to implement them efficiently, and evaluate them on
                 both synthetic and real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Giannakopoulou:2017:COQ,
  author =       "Stella Giannakopoulou and Manos Karpathiotakis and
                 Benjamin Gaidioz and Anastasia Ailamaki",
  title =        "{CleanM}: an optimizable query language for unified
                 scale-out data cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1466--1477",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data cleaning has become an indispensable part of data
                 analysis due to the increasing amount of dirty data.
                 Data scientists spend most of their time preparing
                 dirty data before it can be used for data analysis. At
                 the same time, the existing tools that attempt to
                 automate the data cleaning procedure typically focus on
                 a specific use case and operation. Still, even such
                 specialized tools exhibit long running times or fail to
                 process large datasets. Therefore, from a user's
                 perspective, one is forced to use a different,
                 potentially inefficient tool for each category of
                 errors. This paper addresses the coverage and
                 efficiency problems of data cleaning. It introduces
                 CleanM (pronounced clean'em), a language which can
                 express multiple types of cleaning operations. CleanM
                 goes through a three-level translation process for
                 optimization purposes; a different family of
                 optimizations is applied in each abstraction level.
                 Thus, CleanM can express complex data cleaning tasks,
                 optimize them in a unified way, and deploy them in a
                 scaleout fashion. We validate the applicability of
                 CleanM by using it on top of CleanDB, a newly designed
                 and implemented framework which can query heterogeneous
                 data. When compared to existing data cleaning
                 solutions, CleanDB (a) covers more data corruption
                 cases, (b) scales better, and can handle cases for
                 which its competitors are unable to terminate, and (c)
                 uses a single interface for querying and for data
                 cleaning.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xie:2017:DTS,
  author =       "Dong Xie and Feifei Li and Jeff M. Phillips",
  title =        "Distributed trajectory similarity search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1478--1489",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Mobile and sensing devices have already become
                 ubiquitous. They have made tracking moving objects an
                 easy task. As a result, mobile applications like Uber
                 and many IoT projects have generated massive amounts of
                 trajectory data that can no longer be processed by a
                 single machine efficiently. Among the typical query
                 operations over trajectories, similarity search is a
                 common yet expensive operator in querying trajectory
                 data. It is useful for applications in different
                 domains such as traffic and transportation
                 optimizations, weather forecast and modeling, and
                 sports analytics. It is also a fundamental operator for
                 many important mining operations such as clustering and
                 classification of trajectories. In this paper, we
                 propose a distributed query framework to process
                 trajectory similarity search over a large set of
                 trajectories. We have implemented the proposed
                 framework in Spark, a popular distributed data
                 processing engine, by carefully considering different
                 design choices. Our query framework supports both the
                 Hausdorff distance the Fr{\'e}chet distance. Extensive
                 experiments have demonstrated the excellent scalability
                 and query efficiency achieved by our design, compared
                 to other methods and design alternatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandra:2017:ROJ,
  author =       "Bikash Chandra and S. Sudarshan",
  title =        "Runtime optimization of join location in parallel data
                 management systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1490--1501",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Applications running on parallel systems often need to
                 join a streaming relation or a stored relation with
                 data indexed in a parallel data storage system. Some
                 applications also compute UDFs on the joined tuples.
                 The join can be done at the data storage nodes,
                 corresponding to reduce side joins, or by fetching data
                 from the storage system to compute nodes, corresponding
                 to map side join. Both may be suboptimal: reduce side
                 joins may cause skew, while map side joins may lead to
                 a lot of data being transferred and replicated. In this
                 paper, we present techniques to make runtime decisions
                 between the two options on a per key basis, in order to
                 improve the throughput of the join, accounting for UDF
                 computation if any. Our techniques are based on an
                 extended ski-rental algorithm and provide worst-case
                 performance guarantees with respect to the optimal
                 point in the space considered by us. Our techniques use
                 load balancing taking into account the CPU, network and
                 I/O costs as well as the load on compute and storage
                 nodes. We have implemented our techniques on Hadoop,
                 Spark and the Muppet stream processing engine. Our
                 experiments show that our optimization techniques
                 provide a significant improvement in throughput over
                 existing techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lehmberg:2017:SWT,
  author =       "Oliver Lehmberg and Christian Bizer",
  title =        "Stitching web tables for improving matching quality",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1502--1513",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "HTML tables on web pages (``web tables'') cover a wide
                 variety of topics. Data from web tables can thus be
                 useful for tasks such as knowledge base completion or
                 ad hoc table extension. Before table data can be used
                 for these tasks, the tables must be matched to the
                 respective knowledge base or base table. The challenges
                 of web table matching are the high heterogeneity and
                 the small size of the tables. Though it is known that
                 the majority of web tables are very small, the gold
                 standards that are used to compare web table matching
                 systems mostly consist of larger tables. In this
                 experimental paper, we evaluate T2K Match, a web table
                 to knowledge base matching system, and COMA, a standard
                 schema matching tool, using a sample of web tables that
                 is more realistic than the gold standards that were
                 previously used. We find that both systems fail to
                 produce correct results for many of the very small
                 tables in the sample. As a remedy, we propose to stitch
                 (combine) the tables from each web site into larger
                 ones and match these enlarged tables to the knowledge
                 base or base table afterwards. For this stitching
                 process, we evaluate different schema matching methods
                 in combination with holistic correspondence refinement.
                 Limiting the stitching procedure to web tables from the
                 same web site decreases the heterogeneity and allows us
                 to stitch tables with very high precision. Our
                 experiments show that applying table stitching before
                 running the actual matching method improves the
                 matching results by 0.38 in F1-measure for T2K Match
                 and by 0.14 for COMA. Also, stitching the tables allows
                 us to reduce the amount of tables in our corpus from 5
                 million original web tables to as few as 100,000
                 stitched tables.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shekelyan:2017:DHB,
  author =       "Michael Shekelyan and Anton Dign{\"o}s and Johann
                 Gamper",
  title =        "{DigitHist}: a histogram-based data summary with tight
                 error bounds",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1514--1525",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose DigitHist, a histogram summary for
                 selectivity estimation on multi-dimensional data with
                 tight error bounds. By combining multi-dimensional and
                 one-dimensional histograms along regular grids of
                 different resolutions, DigitHist provides an accurate
                 and reliable histogram approach for multi-dimensional
                 data. To achieve a compact summary, we use a sparse
                 representation combined with a novel histogram
                 compression technique that chooses a higher resolution
                 in dense regions and a lower resolution elsewhere. For
                 the construction of DigitHist, we propose a new error
                 measure, termed u -error, which minimizes the width
                 between the guaranteed upper and lower bounds of the
                 selectivity estimate. The construction algorithm
                 performs a single data scan and has linear time
                 complexity. An in-depth experimental evaluation shows
                 that DigitHist delivers superior precision and error
                 bounds than state-of-the-art competitors at a
                 comparable query time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pilman:2017:FSK,
  author =       "Markus Pilman and Kevin Bocksrocker and Lucas Braun
                 and Renato Marroqu{\'\i}n and Donald Kossmann",
  title =        "Fast scans on key--value stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1526--1537",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Key-Value Stores (KVS) are becoming increasingly
                 popular because they scale up and down elastically,
                 sustain high throughputs for get/put workloads and have
                 low latencies. KVS owe these advantages to their
                 simplicity. This simplicity, however, comes at a cost:
                 It is expensive to process complex, analytical queries
                 on top of a KVS because today's generation of KVS does
                 not support an efficient way to scan the data. The
                 problem is that there are conflicting goals when
                 designing a KVS for analytical queries and for simple
                 get/put workloads: Analytical queries require high
                 locality and a compact representation of data whereas
                 elastic get/put workloads require sparse indexes. This
                 paper shows that it is possible to have it all, with
                 reasonable compromises. We studied the KVS design space
                 and built TellStore, a distributed KVS, that performs
                 almost as well as state-of-the-art KVS for get/put
                 workloads and orders of magnitude better for analytical
                 and mixed workloads. This paper presents the results of
                 comprehensive experiments with an extended version of
                 the YCSB benchmark and a workload from the
                 telecommunication industry.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2017:FMC,
  author =       "Can Lu and Jeffrey Xu Yu and Hao Wei and Yikai Zhang",
  title =        "Finding the maximum clique in massive graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1538--1549",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Cliques refer to subgraphs in an undirected graph such
                 that vertices in each subgraph are pairwise adjacent.
                 The maximum clique problem, to find the clique with
                 most vertices in a given graph, has been extensively
                 studied. Besides its theoretical value as an NP-hard
                 problem, the maximum clique problem is known to have
                 direct applications in various fields, such as
                 community search in social networks and social media,
                 team formation in expert networks, gene expression and
                 motif discovery in bioinformatics and anomaly detection
                 in complex networks, revealing the structure and
                 function of networks. However, algorithms designed for
                 the maximum clique problem are expensive to deal with
                 real-world networks. In this paper, we devise a
                 randomized algorithm for the maximum clique problem.
                 Different from previous algorithms that search from
                 each vertex one after another, our approach RMC, for
                 the randomized maximum clique problem, employs a binary
                 search while maintaining a lower bound $ \omega_c $ and
                 an upper bound [EQUATION] of $ \omega (G) $. In each
                 iteration, RMC attempts to find a $ \omega_t $ -clique
                 where [EQUATION]. As finding $ \omega_t $ in each
                 iteration is NP-complete, we extract a seed set S such
                 that the problem of finding a $ \omega_t$-clique in G
                 is equivalent to finding a $ \omega_t$-clique in S with
                 probability guarantees $ (\geq 1 - n^{-c})$. We propose
                 a novel iterative algorithm to determine the maximum
                 clique by searching a $k$-clique in $S$ starting from $
                 k = \omega_c + 1$ until $S$ becomes [EQUATION], when
                 more iterations benefit marginally. As confirmed by the
                 experiments, our approach is much more efficient and
                 robust than previous solutions and can always find the
                 exact maximum clique.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2017:PPN,
  author =       "Yuankai Zhang and Adam O'Neill and Micah Sherr and
                 Wenchao Zhou",
  title =        "Privacy-preserving network provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1550--1561",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Network accountability, forensic analysis, and failure
                 diagnosis are becoming increasingly important for
                 network management and security. Network provenance
                 significantly aids network administrators in these
                 tasks by explaining system behavior and revealing the
                 dependencies between system states. Although
                 resourceful, network provenance can sometimes be too
                 rich, revealing potentially sensitive information that
                 was involved in system execution. In this paper, we
                 propose a cryptographic approach to preserve the
                 confidentiality of provenance (sub)graphs while
                 allowing users to query and access the parts of the
                 graph for which they are authorized. Our proposed
                 solution is a novel application of searchable symmetric
                 encryption (SSE) and more generally structured
                 encryption (SE). Our SE-enabled provenance system
                 allows a node to enforce access control policies over
                 its provenance data even after the data has been
                 shipped to remote nodes (e.g., for optimization
                 purposes). We present a prototype of our design and
                 demonstrate its practicality, scalability, and
                 efficiency for both provenance maintenance and
                 querying.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Garcia-Ulloa:2017:TDS,
  author =       "Daniel A. Garcia-Ulloa and Li Xiong and Vaidy
                 Sunderam",
  title =        "Truth discovery for spatio-temporal events from
                 crowdsourced data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1562--1573",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "One of the greatest challenges in spatial
                 crowdsourcing is determining the veracity of reports
                 from multiple users about a particular event or
                 phenomenon. In this paper, we address the difficulties
                 of truth discovery in spatio-temporal tasks and present
                 a new method based on recursive Bayesian estimation
                 (BE) from multiple reports of users. Our method
                 incorporates a reliability model for users, which
                 improves as more reports arrive while increasing the
                 accuracy of the model in labeling the state of the
                 event. The model is further improved by Kalman
                 estimation (BE+KE) that models the spatio-temporal
                 correlations of the events and predicts the next state
                 of an event and is corrected when new reports arrive.
                 The methods are tested in a simulated environment, as
                 well as using real-world data. Experimental results
                 show that our methods are adaptable to the available
                 data, can incorporate previous beliefs, and outperform
                 existing truth discovery methods of spatio-temporal
                 events.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Trummer:2017:DVO,
  author =       "Immanuel Trummer and Jiancheng Zhu and Mark Bryan",
  title =        "Data vocalization: optimizing voice output of
                 relational data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1574--1585",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Research on data visualization aims at finding the
                 best way to present data via visual interfaces. We
                 introduce the complementary problem of ``data
                 vocalization''. Our goal is to present relational data
                 in the most efficient way via voice output. This
                 problem setting is motivated by emerging tools and
                 devices (e.g., Google Home, Amazon Echo, Apple's Siri,
                 or voice-based SQL interfaces) that communicate data
                 primarily via audio output to their users. We treat
                 voice output generation as an optimization problem. The
                 goal is to minimize speaking time while transmitting an
                 approximation of a relational table to the user. We
                 consider constraints on the precision of the
                 transmitted data as well as on the cognitive load
                 placed on the listener. We formalize voice output
                 optimization and show that it is NP-hard. We present
                 three approaches to solve that problem. First, we show
                 how the problem can be translated into an integer
                 linear program which enables us to apply corresponding
                 solvers. Second, we present a two-phase approach that
                 forms groups of similar rows in a pre-processing step,
                 using a variant of the apriori algorithm. Then, we
                 select an optimal combination of groups to generate a
                 speech. Finally, we present a greedy algorithm that
                 runs in polynomial time. Under simplifying assumptions,
                 we prove that it generates near-optimal output by
                 leveraging the sub-modularity property of our cost
                 function. We compare our algorithms experimentally and
                 analyze their complexity.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kang:2017:NON,
  author =       "Daniel Kang and John Emmons and Firas Abuzaid and
                 Peter Bailis and Matei Zaharia",
  title =        "{NoScope}: optimizing neural network queries over
                 video at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "11",
  pages =        "1586--1597",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Sep 5 16:07:00 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent advances in computer vision---in the form of
                 deep neural networks---have made it possible to query
                 increasing volumes of video data with high accuracy.
                 However, neural network inference is computationally
                 expensive at scale: applying a state-of-the-art object
                 detector in real time (i.e., 30+ frames per second) to
                 a single video requires a \$4000 GPU. In response, we
                 present NoScope, a system for querying videos that can
                 reduce the cost of neural network video analysis by up
                 to three orders of magnitude via inference-optimized
                 model search. Given a target video, object to detect,
                 and reference neural network, NoScope automatically
                 searches for and trains a sequence, or cascade, of
                 models that preserves the accuracy of the reference
                 network but is specialized to the target video and are
                 therefore far less computationally expensive. NoScope
                 cascades two types of models: specialized models that
                 forego the full generality of the reference model but
                 faithfully mimic its behavior for the target video and
                 object; and difference detectors that highlight
                 temporal differences across frames. We show that the
                 optimal cascade architecture differs across videos and
                 objects, so NoScope uses an efficient cost-based
                 optimizer to search across models and cascades. With
                 this approach, NoScope achieves two to three order of
                 magnitude speed-ups (265--15,500$ \times $ real-time)
                 on binary classification tasks over fixed-angle webcam
                 and surveillance video while maintaining accuracy
                 within 1--5\% of state-of-the-art neural networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2017:PRA,
  author =       "Juchang Lee and SeungHyun Moon and Kyu Hwan Kim and
                 Deok Hoe Kim and Sang Kyun Cha and Wook-Shin Han",
  title =        "Parallel replication across formats in {SAP HANA} for
                 scaling out mixed {OLTP\slash OLAP} workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1598--1609",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137767",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern in-memory database systems are facing the need
                 of efficiently supporting mixed workloads of OLTP and
                 OLAP. A conventional approach to this requirement is to
                 rely on ETL-style, application-driven data replication
                 between two very different OLTP and OLAP systems,
                 sacrificing real-time reporting on operational data. An
                 alternative approach is to run OLTP and OLAP workloads
                 in a single machine, which eventually limits the
                 maximum scalability of OLAP query performance. In order
                 to tackle this challenging problem, we propose a novel
                 database replication architecture called Asynchronous
                 Parallel Table Replication (ATR). ATR supports OLTP
                 workloads in one primary machine, while it supports
                 heavy OLAP workloads in replicas. Here, row-store
                 formats can be used for OLTP transactions at the
                 primary, while column-store formats are used for OLAP
                 analytical queries at the replicas. ATR is designed to
                 support elastic scalability of OLAP query performance
                 while it minimizes the overhead for transaction
                 processing at the primary and minimizes CPU consumption
                 for replayed transactions at the replicas. ATR employs
                 a novel optimistic lock-free parallel log replay scheme
                 which exploits characteristics of multi-version
                 concurrency control (MVCC) in order to enable real-time
                 reporting by minimizing the propagation delay between
                 the primary and replicas. Through extensive experiments
                 with a concrete implementation available in a
                 commercial database system, we demonstrate that ATR
                 achieves sub-second visibility delay even for
                 update-intensive workloads, providing scalable OLAP
                 performance without notable overhead to the primary.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shamsuddin:2017:DLD,
  author =       "Rittika Shamsuddin and Amit Sawant and Balakrishnan
                 Prabhakaran",
  title =        "Developing a low dimensional patient class profile in
                 accordance to their respiration-induced tumor motion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1610--1621",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137768",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Tumor location displacement caused by
                 respiration-induced motion reduces the efficacy of
                 radiation therapy. Three medically relevant patterns
                 are often observed in the respiration-induced motion
                 signal: baseline shift, ES-Range shift, and D-Range
                 shift. In this paper, for patients with lower body
                 cancer, we develop class profiles (a low dimensional
                 pattern frequency structure) that characterize them in
                 terms of these three medically relevant patterns. We
                 propose an adaptive segmentation technique that turns
                 each respiration-induced motion signal into a multi-set
                 of segments based on persistent variations within the
                 signal. These multi-sets of segments is then probed for
                 base behaviors. These base behaviors are then used to
                 develop the group/class profiles using a modified
                 version of the clustering technique described in [1].
                 Finally, via quantitative analysis, we provide a
                 medical characterization for the class profiles, which
                 can be used to explore breathing intervention
                 technique. We show that, with (i) carefully designed
                 feature sets, (ii) the proposed adaptive segmentation
                 technique, (iii) the reasonable modifications to an
                 existing clustering algorithm for multi-sets, and (iv)
                 the proposed medical characterization methodology, it
                 is possible to reduce the time series
                 respiration-induced motion signals into a compact class
                 profile. One of our co-authors is a medical physician
                 and we used his expert opinion to verify the results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ziauddin:2017:DBD,
  author =       "Mohamed Ziauddin and Andrew Witkowski and You Jung Kim
                 and Dmitry Potapov and Janaki Lahorani and Murali
                 Krishna",
  title =        "Dimensions based data clustering and zone maps",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1622--1633",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137769",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In recent years, the data warehouse industry has
                 witnessed decreased use of indexing but increased use
                 of compression and clustering of data facilitating
                 efficient data access and data pruning in the query
                 processing area. A classic example of data pruning is
                 the partition pruning, which is used when table data is
                 range or list partitioned. But lately, techniques have
                 been developed to prune data at a lower granularity
                 than a table partition or sub-partition. A good example
                 is the use of data pruning structure called zone map. A
                 zone map prunes zones of data from a table on which it
                 is defined. Data pruning via zone map is very effective
                 when the table data is clustered by the filtering
                 columns. The database industry has offered support to
                 cluster data in tables by its local columns, and to
                 define zone maps on clustering columns of such tables.
                 This has helped improve the performance of queries that
                 contain filter predicates on local columns. However,
                 queries in data warehouses are typically based on
                 star/snowflake schema with filter predicates usually on
                 columns of the dimension tables joined to a fact table.
                 Given this, the performance of data warehouse queries
                 can be significantly improved if the fact table data is
                 clustered by columns of dimension tables together with
                 zone maps that maintain min/max value ranges of these
                 clustering columns over zones of fact table data. In
                 recognition of this opportunity of significantly
                 improving the performance of data warehouse queries,
                 Oracle 12c release 1 has introduced the support for
                 dimension based clustering of fact tables together with
                 data pruning of the fact tables via dimension based
                 zone maps.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Noghabi:2017:SSS,
  author =       "Shadi A. Noghabi and Kartik Paramasivam and Yi Pan and
                 Navina Ramesh and Jon Bringhurst and Indranil Gupta and
                 Roy H. Campbell",
  title =        "{Samza}: stateful scalable stream processing at
                 {LinkedIn}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1634--1645",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137770",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed stream processing systems need to support
                 stateful processing, recover quickly from failures to
                 resume such processing, and reprocess an entire data
                 stream quickly. We present Apache Samza, a distributed
                 system for stateful and fault-tolerant stream
                 processing. Samza utilizes a partitioned local state
                 along with a low-overhead background changelog
                 mechanism, allowing it to scale to massive state sizes
                 (hundreds of TB) per application. Recovery from
                 failures is sped up by re-scheduling based on Host
                 Affinity. In addition to processing infinite streams of
                 events, Samza supports processing a finite dataset as a
                 stream, from either a streaming source (e.g., Kafka), a
                 database snapshot (e.g., Databus), or a file system
                 (e.g. HDFS), without having to change the application
                 code (unlike the popular Lambda-based architectures
                 which necessitate maintenance of separate code bases
                 for batch and stream path processing). Samza is
                 currently in use at LinkedIn by hundreds of production
                 applications with more than 10, 000 containers. Samza
                 is an open-source Apache project adopted by many
                 top-tier companies (e.g., LinkedIn, Uber, Netflix,
                 TripAdvisor, etc.). Our experiments show that Samza:
                 (a) handles state efficiently, improving latency and
                 throughput by more than 100X compared to using a remote
                 storage; (b) provides recovery time independent of
                 state size; (c) scales performance linearly with number
                 of containers; and (d) supports reprocessing of the
                 data stream quickly and with minimal interference on
                 real-time traffic.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Falk:2017:QAK,
  author =       "Eric Falk and Vijay K. Gurbani and Radu State",
  title =        "Query-able {Kafka}: an agile data analytics pipeline
                 for mobile wireless networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1646--1657",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137771",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Due to their promise of delivering real-time network
                 insights, today's streaming analytics platforms are
                 increasingly being used in the communications networks
                 where the impact of the insights go beyond sentiment
                 and trend analysis to include real-time detection of
                 security attacks and prediction of network state (i.e.,
                 is the network transitioning towards an outage).
                 Current streaming analytics platforms operate under the
                 assumption that arriving traffic is to the order of
                 kilobytes produced at very high frequencies. However,
                 communications networks, especially the
                 telecommunication networks, challenge this assumption
                 because some of the arriving traffic in these networks
                 is to the order of gigabytes, but produced at medium to
                 low velocities. Furthermore, these large datasets may
                 need to be ingested in their entirety to render network
                 insights in real-time. Our interest is to subject
                 today's streaming analytics platforms --- constructed
                 from state-of-the art software components (Kafka,
                 Spark, HDFS, ElasticSearch) --- to traffic densities
                 observed in such communications networks. We find that
                 filtering on such large datasets is best done in a
                 common upstream point instead of being pushed to, and
                 repeated, in downstream components. To demonstrate the
                 advantages of such an approach, we modify Apache Kafka
                 to perform limited native data transformation and
                 filtering, relieving the downstream Spark application
                 from doing this. Our approach outperforms four
                 prevalent analytics pipeline architectures with
                 negligible overhead compared to standard Kafka. (Our
                 modifications to Apache Kafka are publicly available at
                 https://github.com/Esquive/queryable-kafka.git)",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nica:2017:SDS,
  author =       "Anisoara Nica and Reza Sherkat and Mihnea Andrei and
                 Xun Cheng and Martin Heidel and Christian Bensberg and
                 Heiko Gerwens",
  title =        "{Statisticum}: data statistics management in {SAP
                 HANA}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1658--1669",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137772",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We introduce a new concept of leveraging traditional
                 data statistics as dynamic data integrity constraints.
                 These data statistics produce transient database
                 constraints, which are valid as long as they can be
                 proven to be consistent with the current data. We
                 denote this type of data statistics by constraint data
                 statistics, their properties needed for consistency
                 checking by consistency metadata, and their implied
                 integrity constraints by implied data statistics
                 constraints (implied constraints for short). Implied
                 constraints are valid integrity constraints which are
                 powerful query optimization tools employed, just as
                 traditional database constraints, in semantic query
                 transformation (aka query reformulation), partition
                 pruning, runtime optimization, and semi-join reduction,
                 to name a few. To our knowledge, this is the first work
                 introducing this novel and powerful concept of deriving
                 implied integrity constraints from data statistics. We
                 discuss theoretical aspects of the constraint data
                 statistics concept and their integration into query
                 processing. We present the current architecture of data
                 statistics management in SAP HANA and detail how
                 constraint data statistics are designed and integrated
                 into this architecture. As an instantiation of this
                 framework, we consider dynamic partition pruning for
                 data aging scenarios. We discuss our current
                 implementation for constraint data statistics objects
                 in SAP HANA which can be used for dynamic partition
                 pruning. We enumerate their properties and show how
                 consistency checking for implied integrity constraints
                 is supported in the data statistics architecture. Our
                 experimental evaluations on the TPC-H benchmark and a
                 real customer application confirm the effectiveness of
                 the implied integrity constraints; (1) for 59\% of
                 TPC-H queries, constraint data statistics utilization
                 results in pruning cold partitions and reducing memory
                 consumption, and (2) we observe up to 3 orders of
                 magnitude speed-up in query processing time, for a real
                 customer running an S/4HANA application.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gessert:2017:QQW,
  author =       "Felix Gessert and Michael Schaarschmidt and Wolfram
                 Wingerath and Erik Witt and Eiko Yoneki and Norbert
                 Ritter",
  title =        "{Quaestor}: query web caching for
                 database-as-a-service providers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1670--1681",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137773",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today, web performance is primarily governed by
                 round-trip latencies between end devices and cloud
                 services. To improve performance, services need to
                 minimize the delay of accessing data. In this paper, we
                 propose a novel approach to low latency that relies on
                 existing content delivery and web caching
                 infrastructure. The main idea is to enable
                 application-independent caching of query results and
                 records with tunable consistency guarantees, in
                 particular bounded staleness. Quaestor (Query Store)
                 employs two key concepts to incorporate both
                 expiration-based and invalidation-based web caches: (1)
                 an Expiring Bloom Filter data structure to indicate
                 potentially stale data, and (2) statistically derived
                 cache expiration times to maximize cache hit rates.
                 Through a distributed query invalidation pipeline,
                 changes to cached query results are detected in
                 real-time. The proposed caching algorithms offer a new
                 means for data-centric cloud services to trade latency
                 against staleness bounds, e.g. in a
                 database-as-a-service. Quaestor is the core technology
                 of the backend-as-a-service platform Baqend, a cloud
                 service for low-latency websites. We provide empirical
                 evidence for Quaestor's scalability and performance
                 through both simulation and experiments. The results
                 indicate that for read-heavy workloads, up to tenfold
                 speed-ups can be achieved through Quaestor's caching.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gasiunas:2017:FBA,
  author =       "Vaidas Gasiunas and David Dominguez-Sal and Ralph
                 Acker and Aharon Avitzur and Ilan Bronshtein and Rushan
                 Chen and Eli Ginot and Norbert Martinez-Bazan and
                 Michael M{\"u}ller and Alexander Nozdrin and Weijie Ou
                 and Nir Pachter and Dima Sivov and Eliezer Levy",
  title =        "Fiber-based architecture for {NFV} cloud databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1682--1693",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137774",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The telco industry is gradually shifting from using
                 monolithic software packages deployed on custom
                 hardware to using modular virtualized software
                 functions deployed on cloudified data centers using
                 commodity hardware. This transformation is referred to
                 as Network Function Virtualization (NFV). The
                 scalability of the databases (DBs) underlying the
                 virtual network functions is the cornerstone for
                 reaping the benefits from the NFV transformation. This
                 paper presents an industrial experience of applying
                 shared-nothing techniques in order to achieve the
                 scalability of a DB in an NFV setup. The special
                 combination of requirements in NFV DBs are not easily
                 met with conventional execution models. Therefore, we
                 designed a special shared-nothing architecture that is
                 based on cooperative multi-tasking using user-level
                 threads (fibers). We further show that the fiber-based
                 approach outperforms the approach built using
                 conventional multi-threading and meets the variable
                 deployment needs of the NFV transformation.
                 Furthermore, fibers yield a simpler-to-maintain
                 software and enable controlling a trade-off between
                 long-duration computations and real-time requests.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bose:2017:PDF,
  author =       "Joos-Hendrik B{\"o}se and Valentin Flunkert and Jan
                 Gasthaus and Tim Januschowski and Dustin Lange and
                 David Salinas and Sebastian Schelter and Matthias
                 Seeger and Yuyang Wang",
  title =        "Probabilistic demand forecasting at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1694--1705",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137775",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a platform built on large-scale,
                 data-centric machine learning (ML) approaches, whose
                 particular focus is demand forecasting in retail. At
                 its core, this platform enables the training and
                 application of probabilistic demand forecasting models,
                 and provides convenient abstractions and support
                 functionality for forecasting problems. The platform
                 comprises of a complex end-to-end machine learning
                 system built on Apache Spark, which includes data
                 preprocessing, feature engineering, distributed
                 learning, as well as evaluation, experimentation and
                 ensembling. Furthermore, it meets the demands of a
                 production system and scales to large catalogues
                 containing millions of items. We describe the
                 challenges of building such a platform and discuss our
                 design decisions. We detail aspects on several levels
                 of the system, such as a set of general distributed
                 learning schemes, our machinery for ensembling
                 predictions, and a high-level dataflow abstraction for
                 modeling complex ML pipelines. To the best of our
                 knowledge, we are not aware of prior work on real-world
                 demand forecasting systems which rivals our approach in
                 terms of scalability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lee:2017:EBG,
  author =       "Jinho Lee and Heesu Kim and Sungjoo Yoo and Kiyoung
                 Choi and H. Peter Hofstee and Gi-Joon Nam and Mark R.
                 Nutter and Damir Jamsek",
  title =        "{ExtraV}: boosting graph processing near storage with
                 a coherent accelerator",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1706--1717",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137776",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we propose ExtraV, a framework for
                 near-storage graph processing. It is based on the novel
                 concept of graph virtualization, which efficiently
                 utilizes a cache-coherent hardware accelerator at the
                 storage side to achieve performance and flexibility at
                 the same time. ExtraV consists of four main components:
                 (1) host processor, (2) main memory, (3) AFU
                 (Accelerator Function Unit) and (4) storage. The AFU, a
                 hardware accelerator, sits between the host processor
                 and storage. Using a coherent interface that allows
                 main memory accesses, it performs graph traversal
                 functions that are common to various algorithms while
                 the program running on the host processor (called the
                 host program) manages the overall execution along with
                 more application-specific tasks. Graph virtualization
                 is a high-level programming model of graph processing
                 that allows designers to focus on algorithm-specific
                 functions. Realized by the accelerator, graph
                 virtualization gives the host programs an illusion that
                 the graph data reside on the main memory in a layout
                 that fits with the memory access behavior of host
                 programs even though the graph data are actually stored
                 in a multi-level, compressed form in storage. We
                 prototyped ExtraV on a Power8 machine with a
                 CAPI-enabled FPGA. Our experiments on a real system
                 prototype offer significant speedup compared to
                 state-of-the-art software only implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Carbone:2017:SMA,
  author =       "Paris Carbone and Stephan Ewen and Gyula F{\'o}ra and
                 Seif Haridi and Stefan Richter and Kostas Tzoumas",
  title =        "State management in {Apache Flink\reg}: consistent
                 stateful distributed stream processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1718--1729",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137777",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Stream processors are emerging in industry as an
                 apparatus that drives analytical but also mission
                 critical services handling the core of persistent
                 application logic. Thus, apart from scalability and
                 low-latency, a rising system need is first-class
                 support for application state together with strong
                 consistency guarantees, and adaptivity to cluster
                 reconfigurations, software patches and partial
                 failures. Although prior systems research has addressed
                 some of these specific problems, the practical
                 challenge lies on how such guarantees can be
                 materialized in a transparent, non-intrusive manner
                 that relieves the user from unnecessary constraints.
                 Such needs served as the main design principles of
                 state management in Apache Flink, an open source,
                 scalable stream processor. We present Flink's core
                 pipelined, in-flight mechanism which guarantees the
                 creation of lightweight, consistent, distributed
                 snapshots of application state, progressively, without
                 impacting continuous execution. Consistent snapshots
                 cover all needs for system reconfiguration, fault
                 tolerance and version management through coarse grained
                 rollback recovery. Application state is declared
                 explicitly to the system, allowing efficient
                 partitioning and transparent commits to persistent
                 storage. We further present Flink's backend
                 implementations and mechanisms for high availability,
                 external state queries and output commit. Finally, we
                 demonstrate how these mechanisms behave in practice
                 with metrics and large-deployment insights exhibiting
                 the low performance trade-offs of our approach and the
                 general benefits of exploiting asynchrony in
                 continuous, yet sustainable system deployments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zheng:2017:PHA,
  author =       "Jianjun Zheng and Qian Lin and Jiatao Xu and Cheng Wei
                 and Chuwei Zeng and Pingan Yang and Yunfan Zhang",
  title =        "{PaxosStore}: high-availability storage made practical
                 in {WeChat}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1730--1741",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137778",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we present PaxosStore, a
                 high-availability storage system developed to support
                 the comprehensive business of WeChat. It employs a
                 combinational design in the storage layer to engage
                 multiple storage engines constructed for different
                 storage models. PaxosStore is characteristic of
                 extracting the Paxos-based distributed consensus
                 protocol as a middleware that is universally accessible
                 to the underlying multi-model storage engines. This
                 facilitates tuning, maintaining, scaling and extending
                 the storage engines. According to our experience in
                 engineering practice, to achieve a practical consistent
                 read/write protocol is far more complex than its
                 theory. To tackle such engineering complexity, we
                 propose a layered design of the Paxos-based storage
                 protocol stack, where PaxosLog, the key data structure
                 used in the protocol, is devised to bridge the
                 programming-oriented consistent read/write to the
                 storage-oriented Paxos procedure. Additionally, we
                 present optimizations based on Paxos that made
                 fault-tolerance more efficient. Discussion throughout
                 the paper primarily focuses on pragmatic solutions that
                 could be insightful for building practical distributed
                 storage systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Antonopoulos:2017:ROI,
  author =       "Panagiotis Antonopoulos and Hanuma Kodavalla and Alex
                 Tran and Nitish Upreti and Chaitali Shah and Mirek
                 Sztajno",
  title =        "Resumable online index rebuild in {SQL} server",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1742--1753",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137779",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Azure SQL Database and the upcoming release of SQL
                 Server enhance Online Index Rebuild to provide
                 fault-tolerance and allow index rebuild operations to
                 resume after a system failure or a user-initiated
                 pause. SQL Server is the first commercial DBMS to
                 support pause and resume functionality for index
                 rebuilds. This is achieved by splitting the operation
                 into incremental units of work and persisting the
                 required state so that it can be resumed later with
                 minimal loss of progress. At the same time, the
                 proposed technology minimizes the log space required
                 for the operation to succeed, making it possible to
                 rebuild large indexes using only a small, constant
                 amount of log space. These capabilities are critical to
                 guarantee the reliability of these operations in an
                 environment where (a) the database sizes are increasing
                 at a much faster pace compared to the available
                 hardware, (b) system failures are frequent in Cloud
                 architectures using commodity hardware, (c) software
                 upgrades and other maintenance tasks are automatically
                 handled by the Cloud platforms, introducing further
                 unexpected failures for the users and (d) most modern
                 applications need to be available 24/7 and have very
                 tight maintenance windows. This paper describes the
                 design of ``Resumable Online Index Rebuild'' and
                 discusses how this technology can be extended to cover
                 more schema management operations in the future.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Andrei:2017:SHA,
  author =       "Mihnea Andrei and Christian Lemke and G{\"u}nter
                 Radestock and Robert Schulze and Carsten Thiel and
                 Rolando Blanco and Akanksha Meghlan and Muhammad
                 Sharique and Sebastian Seifert and Surendra Vishnoi and
                 Daniel Booss and Thomas Peh and Ivan Schreter and
                 Werner Thesing and Mehul Wagle and Thomas Willhalm",
  title =        "{SAP HANA} adoption of non-volatile memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1754--1765",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137780",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Non-Volatile RAM (NVRAM) is a novel class of hardware
                 technology which is an interesting blend of two storage
                 paradigms: byte-addressable DRAM and block-addressable
                 storage (e.g. HDD/SSD). Most of the existing enterprise
                 relational data management systems such as SAP HANA
                 have their internal architecture based on the inherent
                 assumption that memory is volatile and base their
                 persistence on explicit handling of block-oriented
                 storage devices. In this paper, we present the early
                 adoption of Non-Volatile Memory within the SAP HANA
                 Database, from the architectural and technical angles.
                 We discuss our architectural choices, dive deeper into
                 a few challenges of the NVRAM integration and their
                 solutions, and share our experimental results. As we
                 present our solutions for the NVRAM integration, we
                 also give, as a basis, a detailed description of the
                 relevant HANA internals.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2017:CIS,
  author =       "Mingming Zhang and Tianyu Wo and Tao Xie and Xuelian
                 Lin and Yaxiao Liu",
  title =        "{CarStream}: an industrial system of big data
                 processing for {Internet-of-Vehicles}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1766--1777",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137781",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As the Internet-of-Vehicles (IoV) technology becomes
                 an increasingly important trend for future
                 transportation, designing large-scale IoV systems has
                 become a critical task that aims to process big data
                 uploaded by fleet vehicles and to provide data-driven
                 services. The IoV data, especially high-frequency
                 vehicle statuses (e.g., location, engine parameters),
                 are characterized as large volume with a low density of
                 value and low data quality. Such characteristics pose
                 challenges for developing real-time applications based
                 on such data. In this paper, we address the challenges
                 in designing a scalable IoV system by describing
                 CarStream, an industrial system of big data processing
                 for chauffeured car services. Connected with over
                 30,000 vehicles, CarStream collects and processes
                 multiple types of driving data including vehicle
                 status, driver activity, and passenger-trip
                 information. Multiple services are provided based on
                 the collected data. CarStream has been deployed and
                 maintained for three years in industrial usage,
                 collecting over 40 terabytes of driving data. This
                 paper shares our experiences on designing CarStream
                 based on large-scale driving-data streams, and the
                 lessons learned from the process of addressing the
                 challenges in designing and maintaining CarStream.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bonetta:2017:FJF,
  author =       "Daniele Bonetta and Matthias Brantner",
  title =        "{FAD.js}: fast {JSON} data access using {JIT}-based
                 speculative optimizations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1778--1789",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137782",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "JSON is one of the most popular data encoding formats,
                 with wide adoption in Databases and BigData frameworks
                 as well as native support in popular programming
                 languages such as JavaScript/Node.js, Python, and R.
                 Nevertheless, JSON data processing can easily become a
                 performance bottleneck in data-intensive applications
                 because of parse and serialization overhead. In this
                 paper, we introduce F ad.js, a runtime system for
                 efficient processing of JSON objects in data-intensive
                 applications. Fad.js is based on (1) speculative
                 just-in-time (JIT) compilation and (2) selective access
                 to data. Experiments show that applications using
                 Fad.js achieve speedups up to 2.7x for encoding and
                 9.9x for decoding JSON data when compared to
                 state-of-the art JSON processing libraries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aggour:2017:CCL,
  author =       "Kareem S. Aggour and Jenny Weisenberg Williams and
                 Justin McHugh and Vijay S. Kumar",
  title =        "{Colt}: concept lineage tool for data flow metadata
                 capture and analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1790--1801",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137783",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Most organizations are becoming increasingly
                 data-driven, often processing data from many different
                 sources to enable critical business operations. Beyond
                 the well-addressed challenge of storing and processing
                 large volumes of data, financial institutions in
                 particular are increasingly subject to federal
                 regulations requiring high levels of accountability for
                 the accuracy and lineage of this data. For companies
                 like GE Capital, which maintain data across a globally
                 interconnected network of thousands of systems, it is
                 becoming increasingly challenging to capture an
                 accurate understanding of the data flowing between
                 those systems. To address this problem, we designed and
                 developed a concept lineage tool allowing
                 organizational data flows to be modeled, visualized and
                 interactively explored. This tool has novel features
                 that allow a data flow network to be contextualized in
                 terms of business-specific metadata such as the
                 concept, business, and product for which it applies.
                 Key analysis features have been implemented, including
                 the ability to trace the origination of particular
                 datasets, and to discover all systems where data is
                 found that meets some user-defined criteria. This tool
                 has been readily adopted by users at GE Capital and in
                 a short time has already become a business-critical
                 application, with over 2,200 data systems and over
                 1,000 data flows captured.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yeh:2017:MPI,
  author =       "Chin-Chia Michael Yeh and Nickolas Kavantzas and
                 Eamonn Keogh",
  title =        "Matrix profile {IV}: using weakly labeled time series
                 to predict outcomes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1802--1812",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137784",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In academic settings over the last decade, there has
                 been significant progress in time series
                 classification. However, much of this work makes
                 assumptions that are simply unrealistic for deployed
                 industrial applications. Examples of these unrealistic
                 assumptions include the following: assuming that data
                 subsequences have a single fixed-length, are precisely
                 extracted from the data, and are correctly labeled
                 according to their membership in a set of equal-size
                 classes. In real-world industrial settings, these
                 patterns can be of different lengths, the class
                 annotations may only belong to a general region of the
                 data, may contain errors, and finally, the class
                 distribution is typically highly skewed. Can we learn
                 from such weakly labeled data? In this work, we
                 introduce SDTS, a scalable algorithm that can learn in
                 such challenging settings. We demonstrate the utility
                 of our ideas by learning from diverse datasets with
                 millions of datapoints. As we shall demonstrate, our
                 domain-agnostic parameter-free algorithm can be
                 competitive with domain-specific algorithms used in
                 neuroscience and entomology, even when those algorithms
                 have been tuned by domain experts to incorporate domain
                 knowledge.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chakkappen:2017:ASO,
  author =       "Sunil Chakkappen and Suratna Budalakoti and Ramarajan
                 Krishnamachari and Satyanarayana R. Valluri and Alan
                 Wood and Mohamed Zait",
  title =        "Adaptive statistics in {Oracle 12c}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1813--1824",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137785",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database Management Systems (DBMS) continue to be the
                 foundation of mission critical applications, both OLTP
                 and Analytics. They provide a safe, reliable and
                 efficient platform to store and retrieve data. SQL is
                 the lingua franca of the database world. A database
                 developer writes a SQL statement to specify data
                 sources and express the desired result and the DBMS
                 will figure out the most efficient way to implement it.
                 The query optimizer is the component in a DBMS
                 responsible for finding the best execution plan for a
                 given SQL statement based on statistics, access
                 structures, location, and format. At the center of a
                 query optimizer is a cost model that consumes the above
                 information and helps the optimizer make decisions
                 related to query transformations, join order, join
                 methods, access paths, and data movement. The final
                 execution plan produced by the query optimizer depends
                 on the quality of information used by the cost model,
                 as well as the sophistication of the cost model. In
                 addition to statistics about the data, the cost model
                 also relies on statistics generated internally for
                 intermediate results, e.g. size of the output of a join
                 operation. This paper presents the problems caused by
                 incorrect statistics of intermediate results, survey
                 the existing solutions and present our solution
                 introduced in Oracle 12c. The solution includes
                 validating the generated statistics using table data
                 and via the automatic creation of auxiliary statistics
                 structures. We limit the overhead of the additional
                 work by confining their use to cases where it matters
                 the most, caching the computed statistics, and using
                 table samples. The statistics management is automated.
                 We demonstrate the benefits of our approach based on
                 experiments using two SQL workloads, a benchmark that
                 uses data from the Internal Movie Data Base (IMDB) and
                 a real customer workload.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Floratou:2017:DSR,
  author =       "Avrilia Floratou and Ashvin Agrawal and Bill Graham
                 and Sriram Rao and Karthik Ramasamy",
  title =        "{Dhalion}: self-regulating stream processing in
                 {Heron}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1825--1836",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137786",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In recent years, there has been an explosion of
                 large-scale real-time analytics needs and a plethora of
                 streaming systems have been developed to support such
                 applications. These systems are able to continue stream
                 processing even when faced with hardware and software
                 failures. However, these systems do not address some
                 crucial challenges facing their operators: the manual,
                 time-consuming and error-prone tasks of tuning various
                 configuration knobs to achieve service level objectives
                 (SLO) as well as the maintenance of SLOs in the face of
                 sudden, unpredictable load variation and hardware or
                 software performance degradation. In this paper, we
                 introduce the notion of self-regulating streaming
                 systems and the key properties that they must satisfy.
                 We then present the design and evaluation of Dhalion, a
                 system that provides self-regulation capabilities to
                 underlying streaming systems. We describe our
                 implementation of the Dhalion framework on top of
                 Twitter Heron, as well as a number of policies that
                 automatically reconfigure Heron topologies to meet
                 throughput SLOs, scaling resource consumption up and
                 down as needed. We experimentally evaluate our Dhalion
                 policies in a cloud environment and demonstrate their
                 effectiveness. We are in the process of open-sourcing
                 our Dhalion policies as part of the Heron project.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2017:INO,
  author =       "Erkang Zhu and Ken Q. Pu and Fatemeh Nargesian and
                 Ren{\'e}e J. Miller",
  title =        "Interactive navigation of open data linkages",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1837--1840",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137788",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We developed Toronto Open Data Search to support the
                 ad hoc, interactive discovery of connections or
                 linkages between datasets. It can be used to
                 efficiently navigate through the open data cloud. Our
                 system consists of three parts: a user-interface
                 provided by a Web application; a scalable backend
                 infrastructure that supports navigational queries; and
                 a dynamic repository of open data tables. Our system
                 uses LSH Ensemble, an efficient index structure, to
                 compute linkages (attributes in two datasets with high
                 containment score) in real time at Internet scale. Our
                 application allows users to navigate along these
                 linkages by joining datasets. LSH Ensemble is scalable,
                 providing millisecond response times for linkage
                 discovery queries even over millions of datasets. Our
                 system offers users a highly interactive experience
                 making unrelated (and unlinked) dynamic collections of
                 datasets appear as a richly connected cloud of data
                 that can be navigated and combined easily in real
                 time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pimentel:2017:NTC,
  author =       "Jo{\~a}o Felipe Pimentel and Leonardo Murta and
                 Vanessa Braganholo and Juliana Freire",
  title =        "{noWorkflow}: a tool for collecting, analyzing, and
                 managing provenance from {Python} scripts",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1841--1844",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137789",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present noWorkflow, an open-source tool that
                 systematically and transparently collects provenance
                 from Python scripts, including data about the script
                 execution and how the script evolves over time. During
                 the demo, we will show how noWorkflow collects and
                 manages provenance, as well as how it supports the
                 analysis of computational experiments. We will also
                 encourage attendees to use noWorkflow for their own
                 scripts.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2017:ACB,
  author =       "Chao Wang and Yihao Feng and Qi Guo and Zhaoxian Li
                 and Kexin Liu and Zijian Tang and Anthony K. H. Tung
                 and Lifu Wu and Yuxin Zheng",
  title =        "{ARShop}: a cloud-based augmented reality system for
                 shopping",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1845--1848",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137790",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "ARShop is a one-stop solution for shopping in the
                 cyber-physical world with the help of crowd knowledge
                 and augmented reality. Its ultimate goal is to improve
                 customers' shopping experience. When a customer enters
                 a physical shop and snaps a shot, the enriched cyber
                 information of the surroundings will pop up and be
                 augmented on the screen. ARShop can also be the
                 customer's personal shopping assistant who can show
                 routes to the shops that the customer is interested in.
                 In addition, ARShop provides merchants with a web-based
                 interface to manage their shops and promote their
                 business to customers, and provides customers with an
                 Android App to query using images.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Aberger:2017:MGB,
  author =       "Christopher R. Aberger and Andrew Lamb and Kunle
                 Olukotun and Christopher R{\'e}",
  title =        "Mind the gap: bridging multi-domain query workloads
                 with {EmptyHeaded}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1849--1852",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137791",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Executing domain specific workloads from a relational
                 data warehouse is an increasingly popular task.
                 Unfortunately, classic relational database management
                 systems (RDBMS) are suboptimal in many domains (e.g.,
                 graph and linear algebra queries), and it is
                 challenging to transfer data from an RDBMS to a domain
                 specific toolkit in an efficient manner. This
                 demonstration showcases the EmptyHeaded engine: an
                 interactive query processing engine that leverages a
                 novel query architecture to support efficient execution
                 in multiple domains. To enable a unified design, the
                 EmptyHeaded architecture is built around recent
                 theoretical advancements in join processing and
                 automated in-query data transformations. This
                 demonstration highlights the strengths and weaknesses
                 of this novel type of query processing architecture
                 while showcasing its flexibility in multiple domains.
                 In particular, attendees will use EmptyHeaded's Jupyter
                 notebook front-end to interactively learn the
                 theoretical advantages of this new (and largely
                 unknown) approach and directly observe its performance
                 impact in multiple domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Maccioni:2017:CFL,
  author =       "Antonio Maccioni and Riccardo Torlone",
  title =        "Crossing the finish line faster when paddling the data
                 lake with {KAYAK}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1853--1856",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137792",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Paddling in a data lake is strenuous for a data
                 scientist. Being a loosely-structured collection of raw
                 data with little or no meta-information available, the
                 difficulties of extracting insights from a data lake
                 start from the initial phases of data analysis. Indeed,
                 data preparation, which involves many complex
                 operations (such as source and feature selection,
                 exploratory analysis, data profiling, and data
                 curation), is a long and involved activity for
                 navigating the lake before getting precious insights at
                 the finish line. In this framework, we demonstrate
                 KAYAK, a framework that supports data preparation in a
                 data lake with ad-hoc primitives and allows data
                 scientists to cross the finish line sooner. KAYAK takes
                 into account the tolerance of the user in waiting for
                 the primitives' results and it uses incremental
                 execution strategies to produce informative previews of
                 these results. The framework is based on a wise
                 management of metadata and on features that limit human
                 intervention, thus scaling smoothly when the data lake
                 evolves.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Niu:2017:DTT,
  author =       "Xing Niu and Bahareh Sadat Arab and Seokki Lee and Su
                 Feng and Xun Zou and Dieter Gawlick and Vasudha
                 Krishnaswamy and Zhen Hua Liu and Boris Glavic",
  title =        "Debugging transactions and tracking their provenance
                 with reenactment",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1857--1860",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137793",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Debugging transactions and understanding their
                 execution are of immense importance for developing OLAP
                 applications, to trace causes of errors in production
                 systems, and to audit the operations of a database.
                 However, debugging transactions is hard for several
                 reasons: (1) after the execution of a transaction, its
                 input is no longer available for debugging, (2)
                 internal states of a transaction are typically not
                 accessible, and (3) the execution of a transaction may
                 be affected by concurrently running transactions. We
                 present a debugger for transactions that enables
                 non-invasive, postmortem debugging of transactions with
                 provenance tracking and supports what-if scenarios
                 (changes to transaction code or data). Using
                 reenactment, a declarative replay technique we have
                 developed, a transaction is replayed over the state of
                 the DB seen by its original execution including all its
                 interactions with concurrently executed transactions
                 from the history. Importantly, our approach uses the
                 temporal database and audit logging capabilities
                 available in many DBMS and does not require any
                 modifications to the underlying database system nor
                 transactional workload.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2017:PES,
  author =       "Kai Huang and Sourav S. Bhowmick and Shuigeng Zhou and
                 Byron Choi",
  title =        "{\tt picasso}: exploratory search of connected
                 subgraph substructures in graph databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1861--1864",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137794",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, exploratory search has received much
                 attention in information retrieval and database fields.
                 This search paradigm assists users who do not have a
                 clear search intent and are unfamiliar with the
                 underlying data space. Specifically, query formulation
                 evolves iteratively as the user becomes more familiar
                 with the content. Despite its growing importance,
                 exploratory search on graph-structured data has
                 received little attention in the literature. We
                 demonstrate a system called {\tt picasso} to realize
                 exploratory sub-structure search on a graph database
                 containing a set of small or medium-sized data graphs.
                 {\tt picasso} embodies several novel features such as
                 progressive (i.e., iterative) formulation of queries
                 visually and incremental processing, multi-stream
                 results exploration wall to visualize, explore, and
                 analyze search results to identify possible search
                 directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cai:2017:DDI,
  author =       "Ruichu Cai and Zijie Lu and Li Wang and Zhenjie Zhang
                 and Tom Z. J. Fur and Marianne Winslett",
  title =        "{DITIR}: distributed index for high throughput
                 trajectory insertion and real-time temporal range
                 query",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1865--1868",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137795",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The prosperity of mobile social network and
                 location-based services, e.g., Uber, is backing the
                 explosive growth of spatial temporal streams on the
                 Internet. It raises new challenges to the underlying
                 data store system, which is supposed to support
                 extremely high-throughput trajectory insertion and
                 low-latency querying with spatial and temporal
                 constraints. State-of-the-art solutions, e.g., HBase,
                 do not render satisfactory performance, due to the high
                 overhead on index update. In this demonstration, we
                 present DITIR, our new system prototype tailored to
                 efficiently processing temporal and spacial queries
                 over historical data as well as latest updates. Our
                 system provides better performance guarantee, by
                 physically partitioning the incoming data tuples on
                 their arrivals and exploiting a template-based
                 insertion schema, to reach the desired ingestion
                 throughput. Load balancing mechanism is also introduced
                 to DITIR, by using which the system is capable of
                 achieving reliable performance against workload
                 dynamics. Our demonstration shows that DITIR supports
                 over 1 million tuple insertions in a second, when
                 running on a 10-node cluster. It also significantly
                 outperforms HBase by 7 times on ingestion throughput
                 and 5 times faster on query latency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pang:2017:FIV,
  author =       "Zhifei Pang and Sai Wu and Gang Chen and Ke Chen and
                 Lidan Shou",
  title =        "{FlashView}: an interactive visual explorer for raw
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1869--1872",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137796",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "New data has been generated in an unexpected high
                 speed. To get insight of those data, data analysts will
                 perform a thorough study using state-of-the-art big
                 data analytical tools. Before the analysis starts, a
                 preprocessing is conducted, where data analyst tends to
                 issue a few ad-hoc queries on a new dataset to explore
                 and gain a better understanding. However, it is costly
                 to perform such ad-hoc queries on large scale data
                 using traditional data management systems, e.g., DBMS,
                 because data loading and indexing are very expensive.
                 In this demo, we propose a novel visual data explorer
                 system, FlashView, which omits the loading process by
                 directly querying raw data. FlashView applies
                 approximate query processing technique to achieve
                 real-time query results. It builds both in-memory index
                 and disk index to facilitate the data scanning. It also
                 supports tracking and updating multiple queries
                 concurrently. Note that FlashView is not designed as a
                 replacement of full-fledged DBMS. Instead, it tries to
                 help the analysts quickly understand the
                 characteristics of data, so he/she can selectively load
                 data into the DBMS to do more sophisticated analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Subercaze:2017:UPT,
  author =       "Julien Subercaze and Christophe Gravier and Syed
                 Gillani and Abderrahmen Kammoun and Fr{\'e}d{\'e}rique
                 Laforest",
  title =        "{Upsortable}: programming top-$k$ queries over data
                 streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1873--1876",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137797",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Top-$k$ queries over data streams is a well studied
                 problem. There exists numerous systems allowing to
                 process continuous queries over sliding windows. At the
                 opposite, non-append only streams call for ad-hoc
                 solutions, e.g. tailor-made solutions implemented in a
                 mainstream programming language. In the meantime, the
                 Stream API and lambda expressions have been added in
                 Java 8, thus gaining powerful operations for data
                 stream processing. However, the Java Collections
                 Framework does not provide data structures to safely
                 and conveniently support sorted collections of evolving
                 data. In this paper, we demonstrate Upsortable, an
                 annotation-based approach that allows to use existing
                 sorted collections from the standard Java API for
                 dynamic data management. Our approach relies on a
                 combination of pre-compilation abstract syntax tree
                 modifications and runtime analysis of bytecode.
                 Upsortable offers the developer a safe and
                 time-efficient solution for developing top-$k$ queries
                 on data streams while keeping a full compatibility with
                 standard Java.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chamanara:2017:QSH,
  author =       "Javad Chamanara and Birgitta K{\"o}nig-Ries and H. V.
                 Jagadish",
  title =        "{QUIS}: in-situ heterogeneous data source querying",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1877--1880",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137798",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Existing data integration frameworks are poorly suited
                 for the special requirements of scientists. To answer a
                 specific research question, often, excerpts of data
                 from different sources need to be integrated. The
                 relevant parts and the set of underlying sources may
                 differ from query to query. The analyses also
                 oftentimes involve frequently changing data and
                 exploratory querying. Additionally, The data sources
                 not only store data in different formats, but also
                 provide inconsistent data access functionality. The
                 classic Extract-Transform-Load (ETL) approach seems too
                 complex and time-consuming and does not fit well with
                 interest and expertise of the scientists. With QUIS
                 (QUery In-Situ), we provide a solution for this
                 problem. QUIS is an open source heterogeneous in-situ
                 data querying system. It utilizes a federated query
                 virtualization approach that is built upon plugged-in
                 adapters. QUIS takes a user query and transforms
                 appropriate portions of it into the corresponding
                 computation model on individual data sources and
                 executes it. It complements the segments of the query
                 that the target data sources can not execute. Hence, it
                 guarantees full syntax and semantic support for its
                 language on all data sources. QUIS's in-situ querying
                 facility almost eliminates the time to prepare the data
                 while maintaining a competitive performance and steady
                 scalability. The present demonstration illustrates
                 interesting features of the system: virtual Schemas,
                 heterogeneous joins, and visual query results. We
                 provide a realistic data processing scenario to examine
                 the system's features. Users can interact with QUIS
                 using its desktop workbench, command line interface, or
                 from any R client including RStudio Server.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alawini:2017:ADC,
  author =       "Abdussalam Alawini and Susan B. Davidson and Wei Hu
                 and Yinjun Wu",
  title =        "Automating data citation in {CiteDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1881--1884",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137799",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "An increasing amount of information is being collected
                 in structured, evolving, curated databases, driving the
                 question of how information extracted from such
                 datasets via queries should be cited. While several
                 databases say how data should be cited for web-page
                 views of the database, they leave it to users to
                 manually construct the citations. Furthermore, they do
                 not say how data extracted by queries other than
                 web-page views --- general queries --- should be cited.
                 This demo shows how citations can be specified for a
                 small set of views of the database, and used to
                 automatically generate citations for general queries
                 against the database.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fang:2017:CEB,
  author =       "Yixiang Fang and Reynold Cheng and Siqiang Luo and
                 Jiafeng Hu and Kai Huang",
  title =        "{C-explorer}: browsing communities in large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1885--1888",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137800",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Community retrieval (CR) algorithms, which enable the
                 extraction of subgraphs from large social networks
                 (e.g., Facebook and Twitter), have attracted tremendous
                 interest. Various CR solutions, such as k -core and
                 codicil, have been proposed to obtain graphs whose
                 vertices are closely related. In this paper, we propose
                 the C-Explorer system to assist users in extracting,
                 visualizing, and analyzing communities. C-Explorer
                 provides online and interactive CR facilities, allowing
                 a user to view her interesting graphs, indicate her
                 required vertex q, and display the communities to which
                 q belongs. A seminal feature of C-Explorer is that it
                 uses an attributed graph, whose vertices are associated
                 with labels and keywords, and looks for an attributed
                 community (or AC), whose vertices are structurally and
                 semantically related. Moreover, C-Explorer implements
                 several state-of-the-art CR algorithms, as well as
                 functions for analyzing their effectiveness. We plan to
                 make C-Explorer an open-source web-based platform, and
                 design API functions for software developers to test
                 their CR algorithms in our system.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2017:GPS,
  author =       "Wenfei Fan and Jingbo Xu and Yinghui Wu and Wenyuan Yu
                 and Jiaxin Jiang",
  title =        "{GRAPE}: parallelizing sequential graph computations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1889--1892",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137801",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate GRAPE, a parallel GRAPh query Engine.
                 GRAPE advocates a parallel model based on a
                 simultaneous fixed point computation in terms of
                 partial and incremental evaluation. It differs from
                 prior systems in its ability to parallelize existing
                 sequential graph algorithms as a whole, without the
                 need for recasting the entire algorithms into a new
                 model. One of its unique features is that under a
                 monotonic condition, GRAPE parallelization guarantees
                 to terminate with correct answers as long as the
                 sequential algorithms ``plugged in'' are correct. We
                 demonstrate its parallel computations, ease-of-use and
                 performance compared with the start-of-the-art graph
                 systems. We also demonstrate a use case of GRAPE in
                 social media marketing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khoshkbarforoushha:2017:FDA,
  author =       "Alireza Khoshkbarforoushha and Rajiv Ranjan and Qing
                 Wang and Carsten Friedrich",
  title =        "{Flower}: a data analytics flow elasticity manager",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1893--1896",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137802",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A data analytics flow typically operates on three
                 layers: ingestion, analytics, and storage, each of
                 which is provided by a data-intensive system. These
                 systems are often available as cloud managed services,
                 enabling the users to have pain-free deployment of data
                 analytics flow applications such as click-stream
                 analytics. Despite straightforward orchestration,
                 elasticity management of the flows is challenging. This
                 is due to: (a) heterogeneity of workloads and diversity
                 of cloud resources such as queue partitions, compute
                 servers and NoSQL throughputs capacity, (b) workload
                 dependencies between the layers, and (c) different
                 performance behaviours and resource consumption
                 patterns. In this demonstration, we present Flower, a
                 holistic elasticity management system that exploits
                 advanced optimization and control theory techniques to
                 manage elasticity of complex data analytics flows on
                 clouds. Flower analyzes statistics and data collected
                 from different data-intensive systems to provide the
                 user with a suite of rich functionalities, including:
                 workload dependency analysis, optimal resource share
                 analysis, dynamic resource provisioning, and
                 cross-platform monitoring. We will showcase various
                 features of Flower using a real-world data analytics
                 flow. We will allow the audience to explore Flower by
                 visually defining and configuring a data analytics flow
                 elasticity manager and get hands-on experience with
                 integrated data analytics flow management.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2017:SAD,
  author =       "Zhiyi Wang and Dongyan Zhou and Shimin Chen",
  title =        "{STEED}: an analytical database system for
                 tree-structured data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1897--1900",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137803",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Tree-structured data formats, such as JSON and
                 Protocol Buffers, are capable of expressing
                 sophisticated data types, including nested, repeated,
                 and missing values. While such expressing power
                 contributes to their popularity in real-world
                 applications, it presents a significant challenge for
                 systems supporting tree-structured data. Existing
                 systems have focused on general-purpose solutions
                 either extending RDBMSs or designing native systems.
                 However, the general-purpose approach often results in
                 sophisticated data structures and algorithms, which may
                 not reflect and optimize for the actual structure
                 patterns in the real world. In this demonstration, we
                 showcase Steed, an analytical database System for
                 tree-structured data. We use the insights gained by
                 analyzing representative real-world tree structured
                 data as guidelines in the design of Steed. Steed learns
                 and extracts a schema tree for a data set and uses the
                 schema tree to reduce the storage space and improve the
                 efficiency of data field accesses. We observe that
                 sub-structures in real world data are often simple,
                 while the tree-structured data types can support very
                 sophisticated structures. We optimize the storage
                 structure, the column assembling algorithm, and the
                 in-memory layout for the simple sub-structures (a.k.a.
                 simple paths). Compared to representative
                 state-of-the-art systems (i.e. PostgreSQL/JSON,
                 MongoDB, and Hive+Parquet), Steed achieves orders of
                 magnitude better performance for data analysis
                 queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xiao:2017:LLC,
  author =       "Yonghui Xiao and Li Xiong and Si Zhang and Yang Cao",
  title =        "{LocLok}: location cloaking with differential privacy
                 via hidden {Markov} model",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1901--1904",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137804",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate LocLok, a LOCation-cLOaKing system to
                 protect the locations of a user with differential
                 privacy. LocLok has two features: (a) it protects
                 locations under temporal correlations described through
                 hidden Markov model; (b) it releases the optimal noisy
                 location with the planar isotropic mechanism (PIM), the
                 first mechanism that achieves the lower bound of
                 differential privacy. We show the detailed computation
                 of LocLok with the following components: (a) how to
                 generate the possible locations with Markov model, (b)
                 how to perturb the location with PIM, and (c) how to
                 make inference about the true location in Markov model.
                 An online system with real-word dataset will be
                 presented with the computation details.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2017:SAI,
  author =       "Xiangnan Ren and Olivier Cur{\'e} and Li Ke and Jeremy
                 Lhez and Badre Belabbess and Tendry Randriamalala and
                 Yufan Zheng and Gabriel Kepeklian",
  title =        "{Strider}: an adaptive, inference-enabled distributed
                 {RDF} stream processing engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1905--1908",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137805",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Real-time processing of data streams emanating from
                 sensors is becoming a common task in industrial
                 scenarios. An increasing number of processing jobs
                 executed over such platforms are requiring reasoning
                 mechanisms. The key implementation goal is thus to
                 efficiently handle massive incoming data streams and
                 support reasoning, data analytic services. Moreover, in
                 an on-going industrial project on anomaly detection in
                 large potable water networks, we are facing the effect
                 of dynamically changing data and work characteristics
                 in stream processing. The Strider system addresses
                 these research and implementation challenges by
                 considering scalability, fault-tolerance, high
                 throughput and acceptable latency properties. We will
                 demonstrate the benefits of Strider on an Internet of
                 Things-based real world and industrial setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2017:CAT,
  author =       "Yan Li and Ngai Meng Kou and Hao Wang and Leong Hou U.
                 and Zhiguo Gong",
  title =        "A confidence-aware top-$k$ query processing toolkit on
                 crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1909--1912",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137806",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Ranking techniques have been widely used in ubiquitous
                 applications like recommendation, information
                 retrieval, etc. For ranking computation hostile but
                 human friendly items, crowdsourcing is considered as an
                 emerging technique to process the ranking by human
                 power. However, there is a lack of an easy-to-use
                 toolkit for answering crowdsourced top- k query with
                 minimal effort. In this work, we demonstrate an
                 interactive programming toolkit that is a unified
                 solution for answering the crowd-sourced top- k
                 queries. The toolkit employs a new confidence-aware
                 crowdsourced top- k algorithm, SPR. The whole progress
                 of the algorithm is monitored and visualized to end
                 users in a timely manner. Besides the visualized result
                 and the statistics, the system also reports the
                 estimation of the monetary cost and the breakdown of
                 each phase. Based on the estimation, end users can
                 strike a balance between the budget and the quality
                 through the interface of this toolkit.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fionda:2017:EQK,
  author =       "Valeria Fionda and Giuseppe Pirr{\`o}",
  title =        "Explaining and querying knowledge graphs by
                 relatedness",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1913--1916",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137807",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate RECAP, a tool that explains relatedness
                 between entities in Knowledge Graphs (KGs) and
                 implements a query by relatedness paradigm that allows
                 to retrieve entities related to those in input. One of
                 the peculiarities of RECAP is that it does not require
                 any data preprocessing and can combine knowledge from
                 multiple KGs. The underlying algorithmic techniques are
                 reduced to the execution of SPARQL queries plus some
                 local refinement. This makes the tool readily available
                 on a large variety of KGs accessible via SPARQL
                 endpoints. To show the general applicability of the
                 tool, we will cover a set of use cases drawn from a
                 variety of knowledge domains (e.g., biology, movies,
                 co-authorship networks) and report on the concrete
                 usage of RECAP in the SENSE4US FP7 project. We will
                 underline the technical aspects of the system and give
                 details on its implementation. The target audience of
                 the demo includes both researchers and practitioners
                 and aims at reporting on the benefits of RECAP in
                 practical knowledge discovery applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kunjir:2017:TAM,
  author =       "Mayuresh Kunjir and Shivnath Babu",
  title =        "{Thoth} in action: memory management in modern data
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1917--1920",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137808",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Allocation and usage of memory in modern
                 data-processing platforms is based on an interplay of
                 algorithms at multiple levels: (i) at the
                 resource-management level across containers allocated
                 by resource managers like Mesos and Yarn, (ii) at the
                 container level among the OS and processes such as the
                 Java Virtual Machine (JVM), (iii) at the framework
                 level for caching, aggregation, data shuffles, and
                 application data structures, and (iv) at the JVM level
                 across various pools such as the Young and Old
                 Generation as well as the heap versus off-heap. We use
                 Thoth, a data-driven platform for multi-system cluster
                 management, to build a deep understanding of different
                 interplays in memory management options. Through
                 multiple memory management apps built in Thoth, we
                 demonstrate how Thoth can deal with multiple levels of
                 memory management as well as multi-tenant nature of
                 clusters.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schule:2017:MSS,
  author =       "Maximilian E. Sch{\"u}le and Pascal M. N. Schliski and
                 Thomas Hutzelmann and Tobias Rosenberger and Viktor
                 Leis and Dimitri Vorona and Alfons Kemper and Thomas
                 Neumann",
  title =        "{Monopedia}: staying single is good enough --- the
                 hyper way for web scale applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1921--1924",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137809",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In order to handle the database load for web scale
                 applications, the conventional wisdom is that a cluster
                 of database servers and a caching layer are essential.
                 In this work, we argue that modern main memory database
                 systems are often fast enough to consolidate this
                 complex architecture into a single server (plus an
                 additional fail over system). To demonstrate this
                 claim, we design the Monopedia Benchmark, a benchmark
                 for web scale applications modeled after Wikipedia.
                 Using this benchmark, we show that it is indeed
                 possible to run the database workload of one of the
                 largest web sites in the world on a single database
                 server.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2017:DDM,
  author =       "Ji Sun and Zeyuan Shang and Guoliang Li and Dong Deng
                 and Zhifeng Bao",
  title =        "Dima: a distributed in-memory similarity-based query
                 processing system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1925--1928",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137810",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data analysts in industries spend more than 80\% of
                 time on data cleaning and integration in the whole
                 process of data analytics due to data errors and
                 inconsistencies. It calls for effective query
                 processing techniques to tolerate the errors and
                 inconsistencies. In this paper, we develop a
                 distributed in-memory similarity-based query processing
                 system called Dima. Dima supports two core
                 similarity-based query operations, i.e., similarity
                 search and similarity join. Dima extends the SQL
                 programming interface for users to easily invoke these
                 two operations in their data analysis jobs. To avoid
                 expensive data transformation in a distributed
                 environment, we design selectable signatures where two
                 records approximately match if they share common
                 signatures. More importantly, we can adaptively select
                 the signatures to balance the workload. Dima builds
                 signature-based global indexes and local indexes to
                 support efficient similarity search and join. Since
                 Spark is one of the widely adopted distributed
                 in-memory computing systems, we have seamlessly
                 integrated Dima into Spark and developed effective
                 query optimization techniques in Spark. To the best of
                 our knowledge, this is the first full-fledged
                 distributed in-memory system that can support
                 similarity-based query processing. We demonstrate our
                 system in several scenarios, including entity matching,
                 web table integration and query recommendation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chekol:2017:TTC,
  author =       "Melisachew W. Chekol and Giuseppe Pirr{\`o} and Joerg
                 Schoenfisch and Heiner Stuckenschmidt",
  title =        "{TeCoRe}: temporal conflict resolution in knowledge
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1929--1932",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137811",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The management of uncertainty is crucial when
                 harvesting structured content from unstructured and
                 noisy sources. Knowledge Graphs (kgs), maintaining both
                 numerical and non-numerical facts supported by an
                 underlying schema, are a prominent example. Knowledge
                 Graph management is challenging because: (i) most of
                 existing kgs focus on static data, thus impeding the
                 availability of timewise knowledge; (ii) facts in kgs
                 are usually accompanied by a confidence score, which
                 witnesses how likely it is for them to hold. We
                 demonstrate TeCoRe, a system for temporal inference and
                 conflict resolution in uncertain temporal knowledge
                 graphs (utkgs). At the heart of TeCoRe are two
                 state-of-the-art probabilistic reasoners that are able
                 to deal with temporal constraints efficiently. While
                 one is scalable, the other can cope with more
                 expressive constraints. The demonstration will focus on
                 enabling users and applications to find inconsistencies
                 in utkgs. TeCoRe provides an interface allowing to
                 select utkgs and editing constraints; shows the maximal
                 consistent subset of the utkg, and displays statistics
                 (e.g., number of noisy facts removed) about the
                 debugging process.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2017:MTD,
  author =       "Xupeng Li and Bin Cui and Yiru Chen and Wentao Wu and
                 Ce Zhang",
  title =        "{MLog}: towards declarative in-database machine
                 learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1933--1936",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137812",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate MLog, a high-level language that
                 integrates machine learning into data management
                 systems. Unlike existing machine learning frameworks
                 (e.g., TensorFlow, Theano, and Caffe), MLog is
                 declarative, in the sense that the system manages all
                 data movement, data persistency, and machine-learning
                 related optimizations (such as data batching)
                 automatically. Our interactive demonstration will show
                 audience how this is achieved based on the novel notion
                 of tensoral views (TViews), which are similar to
                 relational views but operate over tensors with linear
                 algebra. With MLog, users can succinctly specify not
                 only simple models such as SVM (in just two lines), but
                 also sophisticated deep learning models that are not
                 supported by existing in-database analytics systems
                 (e.g., MADlib, PAL, and SciDB), as a series of cascaded
                 TViews. Given the declarative nature of MLog, we
                 further demonstrate how query/program optimization
                 techniques can be leveraged to translate MLog programs
                 into native TensorFlow programs. The performance of the
                 automatically generated Tensor-Flow programs is
                 comparable to that of hand-optimized ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Demiralp:2017:FRV,
  author =       "{\c{C}}agatay Demiralp and Peter J. Haas and
                 Srinivasan Parthasarathy and Tejaswini Pedapati",
  title =        "Foresight: recommending visual insights",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1937--1940",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137813",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Current tools for exploratory data analysis (EDA)
                 require users to manually select data attributes,
                 statistical computations and visual encodings. This can
                 be daunting for large-scale, complex data. We introduce
                 Foresight, a system that helps the user rapidly
                 discover visual insights from large high-dimensional
                 datasets. Formally, an ``insight'' is a strong
                 manifestation of a statistical property of the data,
                 e.g., high correlation between two attributes, high
                 skewness or concentration about the mean of a single
                 attribute, a strong clustering of values, and so on.
                 For each insight type, Foresight initially presents
                 visualizations of the top k instances in the data,
                 based on an appropriate ranking metric. The user can
                 then look at ``nearby'' insights by issuing ``insight
                 queries'' containing constraints on insight strengths
                 and data attributes. Thus the user can directly explore
                 the space of insights, rather than the space of data
                 dimensions and visual encodings as in other visual
                 recommender systems. Foresight also provides ``global''
                 views of insight space to help orient the user and
                 ensure a thorough exploration process. Furthermore,
                 Foresight facilitates interactive exploration of large
                 datasets through fast, approximate sketching.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jacobs:2017:BDT,
  author =       "Steven Jacobs and Md Yusuf Sarwar Uddin and Michael
                 Carey and Vagelis Hristidis and Vassilis J. Tsotras and
                 N. Venkatasubramanian and Yao Wu and Syed Safir and
                 Purvi Kaul and Xikui Wang and Mohiuddin Abdul Qader and
                 Yawei Li",
  title =        "A {BAD} demonstration: towards {Big Active Data}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1941--1944",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137814",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nearly all of today's Big Data systems are passive in
                 nature. We demonstrate our Big Active Data (``BAD'')
                 system, a scalable system that continuously and
                 reliably captures Big Data and facilitates the timely
                 and automatic delivery of new information to a large
                 population of interested users as well as supporting
                 analyses of historical information. We built our BAD
                 project by extending an existing scalable, open-source
                 BDMS (AsterixDB [1]) in this active direction. In this
                 demonstration, we allow our audience to participate in
                 an emergency notification application built on top of
                 our BAD platform, and highlight its capabilities.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hassan:2017:CFE,
  author =       "Naeemul Hassan and Gensheng Zhang and Fatma Arslan and
                 Josue Caraballo and Damian Jimenez and Siddhant Gawsane
                 and Shohedul Hasan and Minumol Joseph and Aaditya
                 Kulkarni and Anil Kumar Nayak and Vikas Sable and
                 Chengkai Li and Mark Tremayne",
  title =        "{ClaimBuster}: the first-ever end-to-end fact-checking
                 system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1945--1948",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137815",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Our society is struggling with an unprecedented amount
                 of falsehoods, hyperboles, and half-truths. Politicians
                 and organizations repeatedly make the same false
                 claims. Fake news floods the cyberspace and even
                 allegedly influenced the 2016 election. In fighting
                 false information, the number of active fact-checking
                 organizations has grown from 44 in 2014 to 114 in early
                 2017.$^1$ Fact-checkers vet claims by investigating
                 relevant data and documents and publish their verdicts.
                 For instance, PolitiFact.com, one of the earliest and
                 most popular fact-checking projects, gives factual
                 claims truthfulness ratings such as True, Mostly True,
                 Half true, Mostly False, False, and even ``Pants on
                 Fire''. In the U.S., the election year made
                 fact-checking a part of household terminology. For
                 example, during the first presidential debate on
                 September 26, 2016, NPR.org's live fact-checking
                 website drew 7.4 million page views and delivered its
                 biggest traffic day ever.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deep:2017:QDR,
  author =       "Shaleen Deep and Paraschos Koutris and Yash
                 Bidasaria",
  title =        "{QIRANA} demonstration: real time scalable query
                 pricing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1949--1952",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137816",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The last decade has seen a deluge in data collection
                 and dissemination across a broad range of areas. This
                 phenomena has led to creation of online data markets
                 where entities engage in sale and purchase of data. In
                 this scenario, the key challenge for the data market
                 platform is to ensure that it allows real time,
                 scalable, arbitrage-free pricing of user queries. At
                 the same time, the platform needs to flexible enough
                 for sellers in order to customize the setup of the data
                 to be sold. In this paper, we describe the
                 demonstration of Q irana, a light weight framework that
                 implements query-based pricing at scale. The framework
                 acts as a layer between the end users (buyers and
                 sellers) and the database. Qirana's demonstration
                 features that we highlight are: (i) allows sellers to
                 choose from a variety of pricing functions based on
                 their requirements and incorporates price points as a
                 guide for query pricing; (ii) helps the seller set
                 parameters by mocking workloads; (iii) buyers engage
                 with the platform by directly asking queries and track
                 their budget per dataset;. We demonstrate the tunable
                 parameters of our framework over a real-world dataset,
                 illustrating the promise of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khan:2017:DDT,
  author =       "Meraj Khan and Larry Xu and Arnab Nandi and Joseph M.
                 Hellerstein",
  title =        "{DataTweener}: a demonstration of a tweening engine
                 for incremental visualization of data transforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1953--1956",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137817",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the development and advancement of new data
                 interaction modalities, data exploration and analysis
                 has become a highly interactive process situating the
                 user in a session of successive queries. With rapidly
                 changing results, it becomes difficult for the end user
                 to fully comprehend transformations, especially the
                 transforms corresponding to complex queries. We
                 introduce ``data tweening'' as an informative way of
                 visualizing structural data transforms, presenting the
                 users with a series of incremental visual
                 representations of a resultset transformation. We
                 present transformations as ordered sequences of basic
                 structural transforms and visual cues. The sequences
                 are generated using an automated framework which
                 utilizes differences between the consecutive resultsets
                 and queries in a query session. We evaluate the
                 effectiveness of tweening as a visualization method
                 through a user study.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Salimi:2017:ZCI,
  author =       "Babak Salimi and Corey Cole and Dan R. K. Ports and
                 Dan Suciu",
  title =        "{ZaliQL}: causal inference from observational data at
                 scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1957--1960",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137818",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Causal inference from observational data is a subject
                 of active research and development in statistics and
                 computer science. Many statistical software packages
                 have been developed for this purpose. However, these
                 toolkits do not scale to large datasets. We propose and
                 demonstrate ZaliQL: a SQL-based framework for drawing
                 causal inference from observational data. ZaliQL
                 supports the state-of-the-art methods for causal
                 inference and runs at scale within PostgreSQL database
                 system. In addition, we built a visual interface to
                 wrap around ZaliQL. In our demonstration, we will use
                 this GUI to show a live investigation of the causal
                 effect of different weather conditions on flight
                 delays.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Alarabi:2017:DSH,
  author =       "Louai Alarabi and Mohamed F. Mokbel",
  title =        "A demonstration of {ST-Hadoop}: a {MapReduce}
                 framework for big spatio-temporal data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1961--1964",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137819",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This demo presents ST-Hadoop; the first full-fledged
                 open-source MapReduce framework with a native support
                 for spatio-temporal data. ST-Hadoop injects
                 spatio-temporal awareness in the Hadoop base code,
                 which results in achieving order(s) of magnitude better
                 performance than Hadoop and SpatialHadoop when dealing
                 with spatio-temporal data and queries. The key idea
                 behind ST-Hadoop is its ability in indexing
                 spatio-temporal data within Hadoop Distributed File
                 System (HDFS). A real system prototype of ST-Hadoop,
                 running on a local cluster of 24 machines, is
                 demonstrated with two big-spatio-temporal datasets of
                 Twitter and NYC Taxi data, each of around one billion
                 records.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bharadwaj:2017:CIL,
  author =       "S. Bharadwaj and L. Chiticariu and M. Danilevsky and
                 S. Dhingra and S. Divekar and A. Carreno-Fuentes and H.
                 Gupta and N. Gupta and S.-D. Han and M. Hern{\'a}ndez
                 and H. Ho and P. Jain and S. Joshi and H. Karanam and
                 S. Krishnan and R. Krishnamurthy and Y. Li and S.
                 Manivannan and A. Mittal and F. {\"O}zcan and A. Quamar
                 and P. Raman and D. Saha and K. Sankaranarayanan and J.
                 Sen and P. Sen and S. Vaithyanathan and M. Vasa and H.
                 Wang and H. Zhu",
  title =        "Creation and interaction with large-scale
                 domain-specific knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1965--1968",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137820",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ability to create and interact with large-scale
                 domain-specific knowledge bases from
                 unstructured/semi-structured data is the foundation for
                 many industry-focused cognitive systems. We will
                 demonstrate the Content Services system that provides
                 cloud services for creating and querying high-quality
                 domain-specific knowledge bases by analyzing and
                 integrating multiple (un/semi)structured content
                 sources. We will showcase an instantiation of the
                 system for a financial domain. We will also demonstrate
                 both cross-lingual natural language queries and
                 programmatic API calls for interacting with this
                 knowledge base.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jonathan:2017:DSC,
  author =       "Christopher Jonathan and Mohamed F. Mokbel",
  title =        "A demonstration of {Stella}: a crowdsourcing-based
                 geotagging framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1969--1972",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137821",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper demonstrates Stella; an efficient
                 crowdsourcing-based geotagging framework for any types
                 of objects. In this demonstration, we showcase the
                 effectiveness of Stella in geotagging images via two
                 different scenarios: (1) we provide a graphical
                 interface to show the process of a geotagging process
                 that have been done by using Amazon Mechanical Turk,
                 (2) we seek help from the conference attendees to
                 propose an image to be geotagged or to help us geotag
                 an image by using our application during the
                 demonstration period. At the end of the demonstration
                 period, we will show the geotagging result.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Moll:2017:EBV,
  author =       "Oscar Moll and Aaron Zalewski and Sudeep Pillai and
                 Sam Madden and Michael Stonebraker and Vijay
                 Gadepally",
  title =        "Exploring big volume sensor data with {Vroom}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1973--1976",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137822",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "State of the art sensors within a single autonomous
                 vehicle (AV) can produce video and LIDAR data at rates
                 greater than 30 GB/hour. Unsurprisingly, even small AV
                 research teams can accumulate tens of terabytes of
                 sensor data from multiple trips and multiple vehicles.
                 AV practitioners would like to extract information
                 about specific locations or specific situations for
                 further study, but are often unable to. Queries over AV
                 sensor data are different from generic analytics or
                 spatial queries because they demand reasoning about
                 fields of view as well as heavy computation to extract
                 features from scenes. In this article and demo we
                 present Vroom, a system for ad-hoc queries over AV
                 sensor databases. Vroom combines domain specific
                 properties of AV datasets with selective indexing and
                 multi-query optimization to address challenges posed by
                 AV sensor data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mottin:2017:NTE,
  author =       "Davide Mottin and Matteo Lissandrini and Yannis
                 Velegrakis and Themis Palpanas",
  title =        "New trends on exploratory methods for data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1977--1980",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137824",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data usually comes in a plethora of formats and
                 dimensions, rendering the exploration and information
                 extraction processes cumbersome. Thus, being able to
                 cast exploratory queries in the data with the intent of
                 having an immediate glimpse on some of the data
                 properties is becoming crucial. An exploratory query
                 should be simple enough to avoid complicate declarative
                 languages (such as SQL) and mechanisms, and at the same
                 time retain the flexibility and expressiveness of such
                 languages. Recently, we have witnessed a rediscovery of
                 the so called example-based methods, in which the user,
                 or the analyst circumvent query languages by using
                 examples as input. An example is a representative of
                 the intended results, or in other words, an item from
                 the result set. Example-based methods exploit inherent
                 characteristics of the data to infer the results that
                 the user has in mind, but may not able to (easily)
                 express. They can be useful both in cases where a user
                 is looking for information in an unfamiliar dataset, or
                 simply when she is exploring the data without knowing
                 what to find in there. In this tutorial, we present an
                 excursus over the main methods for exploratory
                 analysis, with a particular focus on example-based
                 methods. We show how different data types require
                 different techniques, and present algorithms that are
                 specifically designed for relational, textual, and
                 graph data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Khan:2017:SSD,
  author =       "Arijit Khan and Sourav S. Bhowmick and Francesco
                 Bonchi",
  title =        "Summarizing static and dynamic big graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1981--1984",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137825",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale, highly-interconnected networks pervade
                 our society and the natural world around us, including
                 the World Wide Web, social networks, knowledge graphs,
                 genome and scientific databases, medical and government
                 records. The massive scale of graph data often
                 surpasses the available computation and storage
                 resources. Besides, users get overwhelmed by the
                 daunting task of understanding and using such graphs
                 due to their sheer volume and complexity. Hence, there
                 is a critical need to summarize large graphs into
                 concise forms that can be more easily visualized,
                 processed, and managed. Graph summarization has indeed
                 attracted a lot of interests from various research
                 communities, such as sociology, physics, chemistry,
                 bioinformatics, and computer science. Different ways of
                 summarizing graphs have been invented that are often
                 complementary to each other. In this tutorial, we
                 discuss algorithmic advances on graph summarization in
                 the context of both classical (e.g., static graphs) and
                 emerging (e.g., dynamic and stream graphs)
                 applications. We emphasize the current challenges and
                 highlight some future research directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mouratidis:2017:GAT,
  author =       "Kyriakos Mouratidis",
  title =        "Geometric approaches for top-$k$ queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1985--1987",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137826",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Top- k processing is a well-studied problem with
                 numerous applications that is becoming increasingly
                 relevant with the growing availability of
                 recommendation systems and decision making software.
                 The objective of this tutorial is twofold. First, we
                 will delve into the geometric aspects of top- k
                 processing. Second, we will cover complementary
                 features to top- k queries, with strong practical
                 relevance and important applications, that have a
                 computational geometric nature. The tutorial will close
                 with insights in the effect of dimensionality on the
                 meaningfulness of top- k queries, and interesting
                 similarities to nearest neighbor search.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tong:2017:SCC,
  author =       "Yongxin Tong and Lei Chen and Cyrus Shahabi",
  title =        "Spatial crowdsourcing: challenges, techniques, and
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1988--1991",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137827",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Crowdsourcing is a new computing paradigm where humans
                 are actively enrolled to participate in the procedure
                 of computing, especially for tasks that are
                 intrinsically easier for humans than for computers. The
                 popularity of mobile computing and sharing economy has
                 extended conventional web-based crowdsourcing to
                 spatial crowdsourcing (SC), where spatial data such as
                 location, mobility and the associated contextual
                 information, plays a central role. In fact, spatial
                 crowdsourcing has stimulated a series of recent
                 industrial successes including Citizen Sensing (Waze),
                 P2P ride-sharing (Uber) and Real-time Online-To-Offline
                 (O2O) services (Instacart and Postmates). In this
                 tutorial, we review the paradigm shift from web-based
                 crowdsourcing to spatial crowdsourcing. We dive deep
                 into the challenges and techniques brought by the
                 unique spatio-temporal characteristics of spatial
                 crowdsourcing. Particularly, we survey new designs in
                 task assignment, quality control, incentive mechanism
                 design and privacy protection on spatial crowdsourcing
                 platforms, as well as the new trend to incorporate
                 crowdsourcing to enhance existing spatial data
                 processing techniques. We also discuss case studies of
                 representative spatial crowdsourcing systems and raise
                 open questions and current challenges for the audience
                 to easily comprehend the tutorial and to advance this
                 important research area.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eldawy:2017:EBS,
  author =       "Ahmed Eldawy and Mohamed F. Mokbel",
  title =        "The era of big spatial data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1992--1995",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137828",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this tutorial, we present the recent work in the
                 database community for handling Big Spatial Data. This
                 topic became very hot due to the recent explosion in
                 the amount of spatial data generated by smart phones,
                 satellites and medical devices, among others. This
                 tutorial goes beyond the use of existing systems as-is
                 (e.g., Hadoop, Spark or Impala), and digs deep into the
                 core components of big systems (e.g., indexing and
                 query processing) to describe how they are designed to
                 handle big spatial data. During this 90-minute
                 tutorial, we review the state-of-the-art work in the
                 area of Big Spatial Data while classifying the existing
                 research efforts according to the implementation
                 approach, underlying architecture, and system
                 components. In addition, we provide case studies of
                 full-fledged systems and applications that handle Big
                 Spatial Data which allows the audience to better
                 comprehend the whole tutorial.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Giatrakos:2017:CER,
  author =       "Nikos Giatrakos and Alexander Artikis and Antonios
                 Deligiannakis and Minos Garofalakis",
  title =        "Complex event recognition in the big data era",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "1996--1999",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137829",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The concept of event processing is established as a
                 generic computational paradigm in various application
                 fields, ranging from data processing in Web
                 environments, over maritime and transport, to finance
                 and medicine. Events report on state changes of a
                 system and its environment. Complex Event Recognition
                 (CER) in turn, refers to the identification of
                 complex/composite events of interest, which are
                 collections of simple events that satisfy some pattern,
                 thereby providing the opportunity for reactive and
                 proactive measures. Examples include the recognition of
                 attacks in computer network nodes, human activities on
                 video content, emerging stories and trends on the
                 Social Web, traffic and transport incidents in smart
                 cities, fraud in electronic marketplaces, cardiac
                 arrhythmias, and epidemic spread. In each scenario, CER
                 allows to make sense of Big event Data streams and
                 react accordingly. The goal of this tutorial is to
                 provide a step-by-step guide for realizing CER in the
                 Big Data era. To do so, it elaborates on major
                 challenges and describes algorithmic toolkits for
                 optimized manipulation of event streams characterized
                 by high volume, velocity and/or lack of veracity,
                 placing emphasis on distributed CER over potentially
                 heterogeneous (data variety) event sources. Finally, we
                 highlight future research directions in the field.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mohan:2017:TBD,
  author =       "C. Mohan",
  title =        "Tutorial: blockchains and databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "2000--2001",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137830",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the last few years, blockchain (also known as
                 distributed ledger), the underlying technology of the
                 permissionless or public Bitcoin network, has become
                 very popular for use in private or permissioned
                 environments. Computer companies like IBM and
                 Microsoft, and many key players in different vertical
                 industry segments have recognized the utility of
                 blockchains for securely managing assets
                 (physical/digital) other than cryptocurrencies. IBM did
                 some pioneering work by architecting and implementing a
                 private blockchain system, and then open sourcing it.
                 That system, which has since then been named Fabric, is
                 being enhanced via the Hyperledger Consortium set up
                 under the auspices of the Linux Foundation. Other
                 efforts in the industry include Enterprise Ethereum, R3
                 Corda and BigchainDB.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zakhary:2017:CWS,
  author =       "Victor Zakhary and Divyakant Agrawal and Amr {El
                 Abbadi}",
  title =        "Caching at the web scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "2002--2005",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137831",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today's web applications and social networks are
                 serving billions of users around the globe. These users
                 generate billions of key lookups and millions of data
                 object updates per second. A single user's social
                 network page load requires hundreds of key lookups.
                 This scale creates many design challenges for the
                 underlying storage systems. First, these systems have
                 to serve user requests with low latency. Any increase
                 in the request latency leads to a decrease in user
                 interest. Second, storage systems have to be highly
                 available. Failures should be handled seamlessly
                 without affecting user requests. Third, users consume
                 an order of magnitude more data than they produce.
                 Therefore, storage systems have to be optimized for
                 read-intensive workloads. To address these challenges,
                 distributed in-memory caching services have been widely
                 deployed on top of persistent storage. In this
                 tutorial, we survey the recent developments in
                 distributed caching services. We present the
                 algorithmic and architectural efforts behind these
                 systems focusing on the challenges in addition to open
                 research questions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2017:HLD,
  author =       "Guoliang Li",
  title =        "Human-in-the-loop data integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "2006--2017",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137833",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data integration aims to integrate data in different
                 sources and provide users with a unified view. However,
                 data integration cannot be completely addressed by
                 purely automated methods. We propose a hybrid
                 human-machine data integration framework that harnesses
                 human ability to address this problem, and apply it
                 initially to the problem of entity matching. The
                 framework first uses rule-based algorithms to identify
                 possible matching pairs and then utilizes the crowd to
                 refine these candidate pairs in order to compute actual
                 matching pairs. In the first step, we propose
                 similarity-based rules and knowledge-based rules to
                 obtain some candidate matching pairs, and develop
                 effective algorithms to learn these rules based on some
                 given positive and negative examples. We build a
                 distributed in-memory system DIMA to efficiently apply
                 these rules. In the second step, we propose a
                 selection-inference-refine framework that uses the
                 crowd to verify the candidate pairs. We first select
                 some ``beneficial'' tasks to ask the crowd and then use
                 transitivity and partial order to infer the answers of
                 unasked tasks based on the crowdsourcing results of the
                 asked tasks. Next we refine the inferred answers with
                 high uncertainty due to the disagreement from the
                 crowd. We develop a crowd-powered database system CDB
                 and deploy it on real crowdsourcing platforms. CDB
                 allows users to utilize a SQL-like language for
                 processing crowd-based queries. Lastly, we provide
                 emerging challenges in human-in-the-loop data
                 integration.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lehner:2017:DCU,
  author =       "Wolfgang Lehner",
  title =        "The data center under your desk: how disruptive is
                 modern hardware for {DB} system design?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "2018--2019",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137834",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While we are already used to see more than 1,000 cores
                 within a single machine, the next processing platforms
                 for database engines will be heterogeneous with
                 built-in GPU-style processors as well as specialized
                 FPGAs or chips with domain-specific instruction sets.
                 Moreover, the traditional volatile as well as the
                 upcoming non-volatile RAM with capacities in the 100s
                 of TBytes per machine will provide great opportunities
                 for storage engines but also call for radical changes
                 on the architecture of such systems. Finally, the
                 emergence of economically affordable,
                 high-speed/low-latency interconnects as a basis for
                 rack-scale computing is questioning long-standing
                 folklore algorithmic assumptions but will certainly
                 play an important role in the big picture of building
                 modern data management platforms. In this talk, we will
                 try to classify and review existing approaches from a
                 performance, robustness, as well as energy efficiency
                 perspective and pinpoint interesting starting points
                 for further research activities.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Milo:2017:SMM,
  author =       "Tova Milo",
  title =        "7 secrets that my mother didn't tell me",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "2020--2020",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137835",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "What does it take to be a good researcher? And, is it
                 different when you are a women? These are questions
                 that many of us are wondering about throughout our
                 career. Being honored with a VLDB Women in Database
                 Research Award, I would like to share with you in this
                 talk some of the secrets to successful research that I
                 have learned over the years. These secrets highlight
                 some of the fundamental research directions that I have
                 taken. No less importantly, they explain how I
                 successfully got to work on them, both personally and
                 professionally.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lv:2017:IPL,
  author =       "Qin Lv and William Josephson and Zhe Wang and Moses
                 Charikar and Kai Li",
  title =        "Intelligent probing for locality sensitive hashing:
                 multi-probe {LSH} and beyond",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "12",
  pages =        "2021--2024",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3137765.3137836",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:19 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The past decade has been marked by the (continued)
                 explosion of diverse data content and the fast
                 development of intelligent data analytics techniques.
                 One problem we identified in the mid-2000s was
                 similarity search of feature-rich data. The challenge
                 here was achieving both high accuracy and high
                 efficiency in high-dimensional spaces. Locality
                 sensitive hashing (LSH), which uses certain random
                 space partitions and hash table lookups to find
                 approximate nearest neighbors, was a promising approach
                 with theoretical guarantees. But LSH alone was
                 insufficient since a large number of hash tables were
                 required to achieve good search quality. Building on an
                 idea of Panigrahy, our multi-probe LSH method
                 introduced the idea of intelligent probing. Given a
                 query object, we strategically probe its neighboring
                 hash buckets (in a query-dependent fashion) by
                 calculating the statistical probabilities of similar
                 objects falling into each bucket. Such intelligent
                 probing can significantly reduce the number of hash
                 tables while achieving high quality. In this paper, we
                 revisit the problem motivation, the challenges, the key
                 design considerations of multi-probe LSH, as well as
                 discuss recent developments in this space and some
                 questions for further research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qin:2017:SRB,
  author =       "Dai Qin and Angela Demke Brown and Ashvin Goel",
  title =        "Scalable replay-based replication for fast databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "13",
  pages =        "2025--2036",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:20 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Primary-backup replication is commonly used for
                 providing fault tolerance in databases. It is performed
                 by replaying the database recovery log on a backup
                 server. Such a scheme raises several challenges for
                 modern, high-throughput multi-core databases. It is
                 hard to replay the recovery log concurrently, and so
                 the backup can become the bottleneck. Moreover, with
                 the high transaction rates on the primary, the log
                 transfer can cause network bottlenecks. Both these
                 bottlenecks can significantly slow the primary
                 database. In this paper, we propose using record-replay
                 for replicating fast databases. Our design enables
                 replay to be performed scalably and concurrently, so
                 that the backup performance scales with the primary
                 performance. At the same time, our approach requires
                 only 15--20\% of the network bandwidth required by
                 traditional logging, reducing network infrastructure
                 costs significantly.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2017:SSE,
  author =       "Kai Ren and Qing Zheng and Joy Arulraj and Garth
                 Gibson",
  title =        "{SlimDB}: a space-efficient key--value storage engine
                 for semi-sorted data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "13",
  pages =        "2037--2048",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:20 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern key--value stores often use write-optimized
                 indexes and compact in-memory indexes to speed up read
                 and write performance. One popular write-optimized
                 index is the Log-structured merge-tree (LSM-tree) which
                 provides indexed access to write-intensive data. It has
                 been increasingly used as a storage backbone for many
                 services, including file system metadata management,
                 graph processing engines, and machine learning feature
                 storage engines. Existing LSM-tree implementations
                 often exhibit high write amplifications caused by
                 compaction, and lack optimizations to maximize read
                 performance on solid-state disks. The goal of this
                 paper is to explore techniques that leverage common
                 workload characteristics shared by many systems using
                 key--value stores to reduce the read/write
                 amplification overhead typically associated with
                 general-purpose LSM-tree implementations. Our
                 experiments show that by applying these design
                 techniques, our new implementation of a key--value
                 store, SlimDB, can be two to three times faster, use
                 less memory to cache metadata indices, and show lower
                 tail latency in read operations compared to popular
                 LSM-tree implementations such as LevelDB and RocksDB.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abdelaziz:2017:SEC,
  author =       "Ibrahim Abdelaziz and Razen Harbi and Zuhair Khayyat
                 and Panos Kalnis",
  title =        "A survey and experimental comparison of distributed
                 {SPARQL} engines for very large {RDF} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "13",
  pages =        "2049--2060",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:20 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed SPARQL engines promise to support very
                 large RDF datasets by utilizing shared-nothing computer
                 clusters. Some are based on distributed frameworks such
                 as MapReduce; others implement proprietary distributed
                 processing; and some rely on expensive preprocessing
                 for data partitioning. These systems exhibit a variety
                 of trade-offs that are not well-understood, due to the
                 lack of any comprehensive quantitative and qualitative
                 evaluation. In this paper, we present a survey of 22
                 state-of-the-art systems that cover the entire spectrum
                 of distributed RDF data processing and categorize them
                 by several characteristics. Then, we select 12
                 representative systems and perform extensive
                 experimental evaluation with respect to preprocessing
                 cost, query performance, scalability and workload
                 adaptability, using a variety of synthetic and real
                 large datasets with up to 4.3 billion triples. Our
                 results provide valuable insights for practitioners to
                 understand the trade-offs for their usage scenarios.
                 Finally, we publish online our evaluation framework,
                 including all datasets and workloads, for researchers
                 to compare their novel systems against the existing
                 ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kunft:2017:BEM,
  author =       "Andreas Kunft and Asterios Katsifodimos and Sebastian
                 Schelter and Tilmann Rabl and Volker Markl",
  title =        "{Blockjoin}: efficient matrix partitioning through
                 joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "13",
  pages =        "2061--2072",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:20 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Linear algebra operations are at the core of many
                 Machine Learning (ML) programs. At the same time, a
                 considerable amount of the effort for solving data
                 analytics problems is spent in data preparation. As a
                 result, end-to-end ML pipelines often consist of (i)
                 relational operators used for joining the input data, (
                 ii) user defined functions used for feature extraction
                 and vectorization, and (iii) linear algebra operators
                 used for model training and cross-validation. Often,
                 these pipelines need to scale out to large datasets. In
                 this case, these pipelines are usually implemented on
                 top of dataflow engines like Hadoop, Spark, or Flink.
                 These dataflow engines implement relational operators
                 on row-partitioned datasets. However, efficient linear
                 algebra operators use block-partitioned matrices. As a
                 result, pipelines combining both kinds of operators
                 require rather expensive changes to the physical
                 representation, in particular re-partitioning steps. In
                 this paper, we investigate the potential of reducing
                 shuffling costs by fusing relational and linear algebra
                 operations into specialized physical operators. We
                 present BlockJoin, a distributed join algorithm which
                 directly produces block-partitioned results. To
                 minimize shuffling costs, BlockJoin applies database
                 techniques known from columnar processing, such as
                 index-joins and late materialization, in the context of
                 parallel dataflow engines. Our experimental evaluation
                 shows speedups up to 6$ \times $ and the skew
                 resistance of BlockJoin compared to state-of-the-art
                 pipelines implemented in Spark.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Choi:2017:EMR,
  author =       "Dong-Wan Choi and Jian Pei and Thomas Heinis",
  title =        "Efficient mining of regional movement patterns in
                 semantic trajectories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "13",
  pages =        "2073--2084",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:20 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Semantic trajectory pattern mining is becoming more
                 and more important with the rapidly growing volumes of
                 semantically rich trajectory data. Extracting
                 sequential patterns in semantic trajectories plays a
                 key role in understanding semantic behaviour of human
                 movement, which can widely be used in many applications
                 such as location-based advertising, road capacity
                 optimisation, and urban planning. However, most of
                 existing works on semantic trajectory pattern mining
                 focus on the entire spatial area, leading to missing
                 some locally significant patterns within a region.
                 Based on this motivation, this paper studies a regional
                 semantic trajectory pattern mining problem, aiming at
                 identifying all the regional sequential patterns in
                 semantic trajectories. Specifically, we propose a new
                 density scheme to quantify the frequency of a
                 particular pattern in space, and thereby formulate a
                 new mining problem of finding all the regions in which
                 such a pattern densely occurs. For the proposed
                 problem, we develop an efficient mining algorithm,
                 called RegMiner (Regional Semantic Trajectory Pattern
                 Miner), which effectively reveals movement patterns
                 that are locally frequent in such a region but not
                 necessarily dominant in the entire space. Our empirical
                 study using real trajectory data shows that RegMiner
                 finds many interesting local patterns that are hard to
                 find by a state-of-the-art global pattern mining
                 scheme, and it also runs several orders of magnitude
                 faster than the global pattern mining algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kiefer:2017:EJS,
  author =       "Martin Kiefer and Max Heimel and Sebastian Bre{\ss}
                 and Volker Markl",
  title =        "Estimating join selectivities using
                 bandwidth-optimized kernel density models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "10",
  number =       "13",
  pages =        "2085--2096",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:20 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Accurately predicting the cardinality of intermediate
                 plan operations is an essential part of any modern
                 relational query optimizer. The accuracy of said
                 estimates has a strong and direct impact on the quality
                 of the generated plans, and incorrect estimates can
                 have a negative impact on query performance. One of the
                 biggest challenges in this field is to predict the
                 result size of join operations. Kernel Density
                 Estimation (KDE) is a statistical method to estimate
                 multivariate probability distributions from a data
                 sample. Previously, we introduced a modern, self-tuning
                 selectivity estimator for range scans based on KDE that
                 out-performs state-of-the-art multidimensional
                 histograms and is efficient to evaluate on graphics
                 cards. In this paper, we extend these
                 bandwidth-optimized KDE models to estimate the result
                 size of single and multiple joins. In particular, we
                 propose two approaches: (1) Building a KDE model from a
                 sample drawn from the join result. (2) Efficiently
                 combining the information from base table KDE models.
                 We evaluated our KDE-based join estimators on a variety
                 of synthetic and real-world datasets, demonstrating
                 that they are superior to state-of-the art join
                 estimators based on sketching or sampling.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Menon:2017:ROF,
  author =       "Prashanth Menon and Todd C. Mowry and Andrew Pavlo",
  title =        "Relaxed operator fusion for in-memory databases:
                 making compilation, vectorization, and prefetching work
                 together at last",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "1--13",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In-memory database management systems (DBMSs) are a
                 key component of modern on-line analytic processing
                 (OLAP) applications, since they provide low-latency
                 access to large volumes of data. Because disk accesses
                 are no longer the principle bottleneck in such systems,
                 the focus in designing query execution engines has
                 shifted to optimizing CPU performance. Recent systems
                 have revived an older technique of using just-in-time
                 (JIT) compilation to execute queries as native code
                 instead of interpreting a plan. The state-of-the-art in
                 query compilation is to fuse operators together in a
                 query plan to minimize materialization overhead by
                 passing tuples efficiently between operators. Our
                 empirical analysis shows, however, that more tactful
                 materialization yields better performance. We present a
                 query processing model called ``relaxed operator
                 fusion'' that allows the DBMS to introduce staging
                 points in the query plan where intermediate results are
                 temporarily materialized. This allows the DBMS to take
                 advantage of inter-tuple parallelism inherent in the
                 plan using a combination of prefetching and SIMD
                 vectorization to support faster query execution on data
                 sets that exceed the size of CPU-level caches. Our
                 evaluation shows that our approach reduces the
                 execution time of OLAP queries by up to 2.2$ \times $
                 and achieves up to 1.8$ \times $ better performance
                 compared to other in-memory DBMSs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2017:PSS,
  author =       "Yu Liu and Bolong Zheng and Xiaodong He and Zhewei Wei
                 and Xiaokui Xiao and Kai Zheng and Jiaheng Lu",
  title =        "{Probesim}: scalable single-source and top-$k$
                 {SimRank} computations on dynamic graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "14--26",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Single-source and top- k SimRank queries are two
                 important types of similarity search in graphs with
                 numerous applications in web mining, social network
                 analysis, spam detection, etc. A plethora of techniques
                 have been proposed for these two types of queries, but
                 very few can efficiently support similarity search over
                 large dynamic graphs, due to either significant
                 preprocessing time or large space overheads. This paper
                 presents ProbeSim, an index-free algorithm for
                 single-source and top- k SimRank queries that provides
                 a non-trivial theoretical guarantee in the absolute
                 error of query results. ProbeSim estimates SimRank
                 similarities without precomputing any indexing
                 structures, and thus can naturally support real-time
                 SimRank queries on dynamic graphs. Besides the
                 theoretical guarantee, ProbeSim also offers satisfying
                 practical efficiency and effectiveness due to
                 non-trivial optimizations. We conduct extensive
                 experiments on a number of benchmark datasets, which
                 demonstrate that our solutions outperform the existing
                 methods in terms of efficiency and effectiveness.
                 Notably, our experiments include the first empirical
                 study that evaluates the effectiveness of SimRank
                 algorithms on graphs with billion edges, using the idea
                 of pooling.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guagliardo:2017:FSS,
  author =       "Paolo Guagliardo and Leonid Libkin",
  title =        "A formal semantics of {SQL} queries, its validation,
                 and applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "27--39",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While formal semantics of theoretical languages
                 underlying SQL have been provided in the past, they all
                 made simplifying assumptions ranging from changes in
                 the syntax to omitting bag semantics and nulls. This
                 situation is reminiscent of what happens in the field
                 of programming languages, where semantics of formal
                 calculi underlying the main features of languages are
                 abundant, but formal semantics of real languages that
                 people use are few and far between. We consider the
                 basic class of SQL queries --- essentially
                 SELECT-FROM-WHERE queries with subqueries, set/bag
                 operations, and nulls --- and define a formal semantics
                 for it, without any departures from the real language.
                 This fragment already requires decisions related to the
                 data model and handling variable names that are
                 normally disregarded by simplified semantics. To
                 justify our choice of the semantics, we validate it
                 experimentally on a large number of randomly generated
                 queries and databases. We give two applications of the
                 semantics. One is the first formal proof of the
                 equivalence of basic SQL and relational algebra that
                 extends to bag semantics and nulls. The other
                 application looks at the three-valued logic employed by
                 SQL, which is universally assumed to be necessary to
                 handle nulls. We prove however that this is not so, as
                 three-valued logic does not add expressive power: every
                 SQL query in our fragment can be evaluated under the
                 usual two-valued Boolean semantics of conditions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kim:2017:EHS,
  author =       "Jinhyun Kim and Jun-Ki Min and Kyuseok Shim",
  title =        "Efficient {Haar$^+$} synopsis construction for the
                 maximum absolute error measure",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "40--52",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Several wavelet synopsis construction algorithms were
                 previously proposed based on dynamic programming for
                 unrestricted Haar wavelet synopses as well as Haar$^+$
                 synopses. However, they find an optimal synopsis for
                 every incoming value in each node of a coefficient
                 tree, even if different incoming values share an
                 identical optimal synopsis. To alleviate the
                 limitation, we present novel algorithms, which keep
                 only a minimal set of the distinct optimal synopses in
                 each node of the tree, for the error-bounded synopsis
                 problem. Furthermore, we propose the methods to
                 restrict coefficient values to be considered to compute
                 the optimal synopses in each node. In addition, by
                 partitioning all optimal synopses in each node into a
                 set of groups, such that every group can be represented
                 by a compact representation, we significantly improve
                 the performance of the proposed algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tao:2017:ASJ,
  author =       "Wenbo Tao and Dong Deng and Michael Stonebraker",
  title =        "Approximate string joins with abbreviations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "53--65",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "String joins have wide applications in data
                 integration and cleaning. The inconsistency of data
                 caused by data errors, term variations and missing
                 values has led to the need for approximate string joins
                 (ASJ). In this paper, we study ASJ with abbreviations,
                 which are a frequent type of term variation. Although
                 prior works have studied ASJ given a user-inputted
                 dictionary of synonym rules, they have three common
                 limitations. First, they suffer from low precision in
                 the presence of abbreviations having multiple full
                 forms. Second, their join algorithms are not scalable
                 due to the exponential time complexity. Third, the
                 dictionary may not exist since abbreviations are highly
                 domain-dependent. We propose an end-to-end workflow to
                 address these limitations. There are three main
                 components in the workflow: (1) a new similarity
                 measure taking abbreviations into account that can
                 handle abbreviations having multiple full forms, (2) an
                 efficient join algorithm following the
                 filter-verification framework and (3) an unsupervised
                 approach to learn a dictionary of abbreviation rules
                 from input strings. We evaluate our workflow on four
                 real-world datasets and show that our workflow outputs
                 accurate join results, scales well as input size grows
                 and greatly outperforms state-of-the-art approaches in
                 both accuracy and efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nguyen:2017:QDF,
  author =       "Dat Ba Nguyen and Abdalghani Abujabal and Nam Khanh
                 Tran and Martin Theobald and Gerhard Weikum",
  title =        "Query-driven on-the-fly knowledge base construction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "66--79",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today's openly available knowledge bases, such as
                 DBpedia, Yago, Wikidata or Freebase, capture billions
                 of facts about the world's entities. However, even the
                 largest among these (i) are still limited in up-to-date
                 coverage of what happens in the real world, and (ii)
                 miss out on many relevant predicates that precisely
                 capture the wide variety of relationships among
                 entities. To overcome both of these limitations, we
                 propose a novel approach to build on-the-fly knowledge
                 bases in a query-driven manner. Our system, called
                 QKBfly, supports analysts and journalists as well as
                 question answering on emerging topics, by dynamically
                 acquiring relevant facts as timely and comprehensively
                 as possible. QKBfly is based on a semantic-graph
                 representation of sentences, by which we perform three
                 key IE tasks, namely named-entity disambiguation,
                 co-reference resolution and relation extraction, in a
                 light-weight and integrated manner. In contrast to Open
                 IE, our output is canonicalized. In contrast to
                 traditional IE, we capture more predicates, including
                 ternary and higher-arity ones. Our experiments
                 demonstrate that QKBfly can build high-quality,
                 on-the-fly knowledge bases that can readily be
                 deployed, e.g., for the task of ad-hoc question
                 answering.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Poppe:2017:GGB,
  author =       "Olga Poppe and Chuan Lei and Elke A. Rundensteiner and
                 David Maier",
  title =        "{GRETA}: graph-based real-time event trend
                 aggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "80--92",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Streaming applications from algorithmic trading to
                 traffic management deploy Kleene patterns to detect and
                 aggregate arbitrarily-long event sequences, called
                 event trends. State-of-the-art systems process such
                 queries in two steps. Namely, they first construct all
                 trends and then aggregate them. Due to the exponential
                 costs of trend construction, this two-step approach
                 suffers from both a long delays and high memory costs.
                 To overcome these limitations, we propose the
                 Graph-based Real-time Event Trend Aggregation (GRETA)
                 approach that dynamically computes event trend
                 aggregation without first constructing these trends. We
                 define the GRETA graph to compactly encode all trends.
                 Our GRETA runtime incrementally maintains the graph,
                 while dynamically propagating aggregates along its
                 edges. Based on the graph, the final aggregate is
                 incrementally updated and instantaneously returned at
                 the end of each query window. Our GRETA runtime
                 represents a win-win solution, reducing both the time
                 complexity from exponential to quadratic and the space
                 complexity from exponential to linear in the number of
                 events. Our experiments demonstrate that GRETA achieves
                 up to four orders of magnitude speed-up and up to
                 50--fold memory reduction compared to the
                 state-of-the-art two-step approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guo:2017:PPP,
  author =       "Wentian Guo and Yuchen Li and Mo Sha and Kian-Lee
                 Tan",
  title =        "Parallel {Personalized PageRank} on dynamic graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "93--106",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Personalized PageRank (PPR) is a well-known proximity
                 measure in graphs. To meet the need for dynamic PPR
                 maintenance, recent works have proposed a local update
                 scheme to support incremental computation.
                 Nevertheless, sequential execution of the scheme is
                 still too slow for highspeed stream processing.
                 Therefore, we are motivated to design a parallel
                 approach for dynamic PPR computation. First, as updates
                 always come in batches, we devise a batch processing
                 method to reduce synchronization cost among every
                 single update and enable more parallelism for iterative
                 parallel execution. Our theoretical analysis shows that
                 the parallel approach has the same asymptotic
                 complexity as the sequential approach. Second, we
                 devise novel optimization techniques to effectively
                 reduce runtime overheads for parallel processes.
                 Experimental evaluation shows that our parallel
                 algorithm can achieve orders of magnitude speedups on
                 GPUs and multi-core CPUs compared with the
                 state-of-the-art sequential algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sha:2017:ADG,
  author =       "Mo Sha and Yuchen Li and Bingsheng He and Kian-Lee
                 Tan",
  title =        "Accelerating dynamic graph analytics on {GPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "1",
  pages =        "107--120",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Oct 10 17:16:21 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As graph analytics often involves compute-intensive
                 operations, GPUs have been extensively used to
                 accelerate the processing. However, in many
                 applications such as social networks, cyber security,
                 and fraud detection, their representative graphs evolve
                 frequently and one has to perform a rebuild of the
                 graph structure on GPUs to incorporate the updates.
                 Hence, rebuilding the graphs becomes the bottleneck of
                 processing high-speed graph streams. In this paper, we
                 propose a GPU-based dynamic graph storage scheme to
                 support existing graph algorithms easily. Furthermore,
                 we propose parallel update algorithms to support
                 efficient stream updates so that the maintained graph
                 is immediately available for high-speed analytic
                 processing on GPUs. Our extensive experiments with
                 three streaming applications on large-scale real and
                 synthetic datasets demonstrate the superior performance
                 of our proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Appuswamy:2017:AIS,
  author =       "Raja Appuswamy and Angelos C. Anadiotis and Danica
                 Porobic and Mustafa K. Iman and Anastasia Ailamaki",
  title =        "Analyzing the impact of system architecture on the
                 scalability of {OLTP} engines for high-contention
                 workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "121--134",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149194",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Main-memory OLTP engines are being increasingly
                 deployed on multicore servers that provide abundant
                 thread-level parallelism. However, recent research has
                 shown that even the state-of-the-art OLTP engines are
                 unable to exploit available parallelism for high
                 contention workloads. While previous studies have shown
                 the lack of scalability of all popular concurrency
                 control protocols, they consider only one system
                 architecture---a non-partitioned, shared everything one
                 where transactions can be scheduled to run on any core
                 and can access any data or metadata stored in shared
                 memory. In this paper, we perform a thorough analysis
                 of the impact of other architectural alternatives
                 (Data-oriented transaction execution, Partitioned
                 Serial Execution, and Delegation) on scalability under
                 high contention scenarios. In doing so, we present
                 Trireme, a main-memory OLTP engine testbed that
                 implements four system architectures and several
                 popular concurrency control protocols in a single code
                 base. Using Trireme, we present an extensive
                 experimental study to understand (i) the impact of each
                 system architecture on overall scalability, (ii) the
                 interaction between system architecture and concurrency
                 control protocols, and (iii) the pros and cons of new
                 architectures that have been proposed recently to
                 explicitly deal with high-contention workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jung:2017:SDL,
  author =       "Hyungsoo Jung and Hyuck Han and Sooyong Kang",
  title =        "Scalable database logging for multicores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "135--148",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149195",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern databases, guaranteeing atomicity and
                 durability, store transaction logs in a volatile,
                 central log buffer and then flush the log buffer to
                 non-volatile storage by the write-ahead logging
                 principle. Buffering logs in central log store has
                 recently faced a severe multicore scalability problem,
                 and log flushing has been challenged by synchronous I/O
                 delay. We have designed and implemented a fast and
                 scalable logging method, E leda, that can migrate a
                 surge of transaction logs from volatile memory to
                 stable storage without risking durable transaction
                 atomicity. Our efficient implementation of Eleda is
                 enabled by a highly concurrent data structure,
                 Grasshopper, that eliminates a multicore scalability
                 problem of centralized logging and enhances system
                 utilization in the presence of synchronous I/O delay.
                 We implemented Eleda and plugged it to WiredTiger and
                 Shore-MT by replacing their log managers. Our
                 evaluation showed that Eleda-based transaction systems
                 improve performance up to $ 71 \times $, thus showing
                 the applicability of Eleda.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bonifati:2017:ASL,
  author =       "Angela Bonifati and Wim Martens and Thomas Timm",
  title =        "An analytical study of large {SPARQL} query logs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "149--161",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149196",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the adoption of RDF as the data model for Linked
                 Data and the Semantic Web, query specification from
                 end-users has become more and more common in SPARQL
                 endpoints. In this paper, we conduct an in-depth
                 analytical study of the queries formulated by end-users
                 and harvested from large and up-to-date query logs from
                 a wide variety of RDF data sources. As opposed to
                 previous studies, ours is the first assessment on a
                 voluminous query corpus, spanning over several years
                 and covering many representative SPARQL endpoints.
                 Apart from the syntactical structure of the queries,
                 that exhibits already interesting results on this
                 generalized corpus, we drill deeper in the structural
                 characteristics related to the graph and hypergraph
                 representation of queries. We outline the most common
                 shapes of queries when visually displayed as undirected
                 graphs, and characterize their (hyper-)tree width.
                 Moreover, we analyze the evolution of queries over
                 time, by introducing the novel concept of a streak,
                 i.e., a sequence of queries that appear as subsequent
                 modifications of a seed query. Our study offers several
                 fresh insights on the already rich query features of
                 real SPARQL queries formulated by real users, and
                 brings us to draw a number of conclusions and pinpoint
                 future directions for SPARQL query evaluation, query
                 optimization, tuning, and benchmarking.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2017:ACT,
  author =       "Pinghui Wang and Yiyan Qi and Yu Sun and Xiangliang
                 Zhang and Jing Tao and Xiaohong Guan",
  title =        "Approximately counting triangles in large graph
                 streams including edge duplicates with a fixed memory
                 usage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "162--175",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149197",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Counting triangles in a large graph is important for
                 detecting network anomalies such as spam web pages and
                 suspicious accounts (e.g., fraudsters and advertisers)
                 on online social networks. However, it is challenging
                 to compute the number of triangles in a large graph
                 represented as a stream of edges with a low
                 computational cost when given a limited memory.
                 Recently, several effective sampling-based
                 approximation methods have been developed to solve this
                 problem. However, they assume the graph stream of
                 interest contains no duplicate edges, which does not
                 hold in many real-world graph streams (e.g., phone
                 calling networks). In this paper, we observe that these
                 methods exhibit a large estimation error or
                 computational cost even when modified to deal with
                 duplicate edges using deduplication techniques such as
                 Bloom filter and hash-based sampling. To solve this
                 challenge, we design a one-pass streaming algorithm for
                 uniformly sampling distinct edges at a high speed.
                 Compared to state-of-the-art algorithms, our algorithm
                 reduces the sampling cost per edge from O (log k ) ( k
                 is the maximum number of sampled edges determined by
                 the available memory space) to O (1) without using any
                 additional memory space. Based on sampled edges, we
                 develop a simple yet accurate method to infer the
                 number of triangles in the original graph stream. We
                 conduct extensive experiments on a variety of
                 real-world large graphs, and the results demonstrate
                 that our method is several times more accurate and
                 faster than state-of-the-art methods with the same
                 memory usage.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qiao:2017:SMC,
  author =       "Miao Qiao and Hao Zhang and Hong Cheng",
  title =        "Subgraph matching: on compression and computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "176--188",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149198",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Subgraph matching finds a set I of all occurrences of
                 a pattern graph in a target graph. It has a wide range
                 of applications while suffers an expensive computation.
                 This efficiency issue has been studied extensively. All
                 existing approaches, however, turn a blind eye to the
                 output crisis, that is, when the system has to
                 materialize I as a preprocessing/intermediate/final
                 result or an index, the cost of the export of I
                 dominates the overall cost, which could be prohibitive
                 even for a small pattern graph. This paper studies
                 subgraph matching via two problems. (1) Is there an
                 ideal compression of I? (2) Will the compression of I
                 reversely boost the computation of I? For the problem
                 (1), we propose a technique called VCBC to compress I
                 to code(I) which serves effectively the same as I. For
                 problem (2), we propose a subgraph matching computation
                 framework CBF which computes code(I) instead of I to
                 bring down the output cost. CBF further reduces the
                 overall cost by reducing the intermediate results.
                 Extensive experiments show that the compression ratio
                 of VCBC can be up to $ 10^5 $ which also significantly
                 lowers the output cost of CBF. Extensive experiments
                 show the superior performance of CBF over existing
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Singh:2017:SEM,
  author =       "Rohit Singh and Venkata Vamsikrishna Meduri and Ahmed
                 Elmagarmid and Samuel Madden and Paolo Papotti and
                 Jorge-Arnulfo Quian{\'e}-Ruiz and Armando Solar-Lezama
                 and Nan Tang",
  title =        "Synthesizing entity matching rules by examples",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "189--202",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149199",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity matching (EM) is a critical part of data
                 integration. We study how to synthesize entity matching
                 rules from positive-negative matching examples. The
                 core of our solution is program synthesis, a powerful
                 tool to automatically generate rules (or programs) that
                 satisfy a given high-level specification, via a
                 predefined grammar. This grammar describes a General
                 Boolean Formula ( GBF ) that can include arbitrary
                 attribute matching predicates combined by conjunctions
                 ($ \vee $), disjunctions ($ \wedge $) and negations ($
                 \isonot $), and is expressive enough to model EM
                 problems, from capturing arbitrary attribute
                 combinations to handling missing attribute values. The
                 rules in the form of GBF are more concise than
                 traditional EM rules represented in Disjunctive Normal
                 Form ( DNF ). Consequently, they are more interpretable
                 than decision trees and other machine learning
                 algorithms that output deep trees with many branches.
                 We present a new synthesis algorithm that, given only
                 positive-negative examples as input, synthesizes EM
                 rules that are effective over the entire dataset.
                 Extensive experiments show that we outperform other
                 interpretable rules (e.g., decision trees with low
                 depth) in effectiveness, and are comparable with
                 non-interpretable tools (e.g., decision trees with high
                 depth, gradient-boosting trees, random forests and
                 SVM).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2017:SST,
  author =       "Liang He and Bin Shao and Yatao Li and Huanhuan Xia
                 and Yanghua Xiao and Enhong Chen and Liang Jeff Chen",
  title =        "{Stylus}: a strongly-typed store for serving massive
                 {RDF} data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "203--216",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149200",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "RDF is one of the most commonly used knowledge
                 representation forms. Many highly influential knowledge
                 bases, such as Freebase and PubChemRDF, are in RDF
                 format. An RDF data set is usually represented as a
                 collection of subject-predicate-object triples. Despite
                 the flexibility of RDF triples, it is challenging to
                 serve SPARQL queries on RDF data efficiently by
                 directly managing triples due to the following two
                 reasons. First, heavy joins on a large number of
                 triples are needed for query processing, resulting in a
                 large number of data scans and large redundant
                 intermediate results; Second, weakly-typed triple
                 representation provides suboptimal random access ---
                 typically with logarithmic complexity. This data access
                 challenge, unfortunately, cannot be easily met by a
                 better query optimizer as large graph processing is
                 extremely I/O-intensive. In this paper, we argue that
                 strongly-typed graph representation is the key to
                 high-performance RDF query processing. We propose
                 Stylus --- a strongly-typed store for serving massive
                 RDF data. Stylus exploits a strongly-typed storage
                 scheme to boost the performance of RDF query
                 processing. The storage scheme is essentially a
                 materialized join view on entities, it thus can
                 eliminate a large number of unnecessary joins on
                 triples. Moreover, it is equipped with a compact
                 representation for intermediate results and an
                 efficient graph-decomposition based query planner.
                 Experimental results on both synthetic and real-life
                 RDF data sets confirm that the proposed approach can
                 dramatically boost the performance of SPARQL query
                 processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ioannou:2017:HQE,
  author =       "Ekaterini Ioannou and Minos Garofalakis",
  title =        "Holistic query evaluation over information extraction
                 pipelines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "217--229",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149201",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We introduce holistic in-database query processing
                 over information extraction pipelines. This requires
                 considering the joint conditional distribution over
                 generic Conditional Random Fields that uses factor
                 graphs to encode extraction tasks. Our approach
                 introduces Canopy Factor Graphs, a novel probabilistic
                 model for effectively capturing the joint conditional
                 distribution given a canopy clustering of the data, and
                 special query operators for retrieving resolution
                 information. Since inference on such models is
                 intractable, we introduce an approximate technique for
                 query processing and optimizations that cut across the
                 integrated tasks for reducing the required processing
                 time. Effectiveness and scalability are verified
                 through an extensive experimental evaluation using real
                 and synthetic data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Psaropoulos:2017:ICP,
  author =       "Georgios Psaropoulos and Thomas Legler and Norman May
                 and Anastasia Ailamaki",
  title =        "Interleaving with coroutines: a practical approach for
                 robust index joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "2",
  pages =        "230--242",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3149193.3149202",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 30 06:16:03 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Index join performance is determined by the efficiency
                 of the lookup operation on the involved index. Although
                 database indexes are highly optimized to leverage
                 processor caches, main memory accesses inevitably
                 increase lookup runtime when the index outsizes the
                 last-level cache; hence, index join performance drops.
                 Still, robust index join performance becomes possible
                 with instruction stream interleaving: given a group of
                 lookups, we can hide cache misses in one lookup with
                 instructions from other lookups by switching among
                 their respective instruction streams upon a cache miss.
                 In this paper, we propose interleaving with coroutines
                 for any type of index join. We showcase our proposal on
                 SAP HANA by implementing binary search and CSB$^+$-tree
                 traversal for an instance of index join related to
                 dictionary compression. Coroutine implementations not
                 only perform similarly to prior interleaving
                 techniques, but also resemble the original code
                 closely, while supporting both interleaved and
                 non-interleaved execution. Thus, we claim that
                 coroutines make interleaving practical for use in real
                 DBMS codebases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wen:2017:ESG,
  author =       "Dong Wen and Lu Qin and Ying Zhang and Lijun Chang and
                 Xuemin Lin",
  title =        "Efficient structural graph clustering: an index-based
                 approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "243--255",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157795",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph clustering is a fundamental problem widely
                 experienced across many industries. The structural
                 graph clustering (SCAN) method obtains not only
                 clusters but also hubs and outliers. However, the
                 clustering results closely depend on two sensitive
                 parameters, $ \epsilon $ and $ \mu $, while the optimal
                 parameter setting depends on different graph properties
                 and various user requirements. Moreover, all existing
                 SCAN solutions need to scan at least the whole graph,
                 even if only a small number of vertices belong to
                 clusters. In this paper we propose an index-based
                 method for SCAN. Based on our index, we cluster the
                 graph for any $ \epsilon $ and $ \mu $ in $ O(\Sigma_{c
                 \epsilon C} |E_C|) $ time, where $C$ is the result set
                 of all clusters and $ | E_C |$ is the number of edges
                 in a specific cluster $C$. In other words, the time
                 expended to compute structural clustering depends only
                 on the result size, not on the size of the original
                 graph. Our index's space complexity is bounded by $
                 O(m)$, where $m$ is the number of edges in the graph.
                 To handle dynamic graph updates, we propose algorithms
                 and several optimization techniques for maintaining our
                 index. We conduct extensive experiments to practically
                 evaluate the performance of all our proposed algorithms
                 on 10 real-world networks, one of which contains more
                 than 1 billion edges. The experimental results
                 demonstrate that our approaches significantly
                 outperform existing solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{DeCapitanidiVimercati:2017:AMM,
  author =       "Sabrina {De Capitani di Vimercati} and Sara Foresti
                 and Sushil Jajodia and Giovanni Livraga and Stefano
                 Paraboschi and Pierangela Samarati",
  title =        "An authorization model for multi provider queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "256--268",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157796",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a novel approach for the specification and
                 enforcement of authorizations that enables controlled
                 data sharing for collaborative queries in the cloud.
                 Data authorities can establish authorizations
                 regulating access to their data distinguishing three
                 visibility levels (no visibility, encrypted visibility,
                 and plaintext visibility). Authorizations are enforced
                 in the query execution by possibly restricting
                 operation assignments to other parties and by adjusting
                 visibility of data on-the-fly. Our approach enables
                 users and data authorities to fully enjoy the benefits
                 and economic savings of the competitive open cloud
                 market, while maintaining control over data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ratner:2017:SRT,
  author =       "Alexander Ratner and Stephen H. Bach and Henry
                 Ehrenberg and Jason Fries and Sen Wu and Christopher
                 R{\'e}",
  title =        "{Snorkel}: rapid training data creation with weak
                 supervision",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "269--282",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157797",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Labeling training data is increasingly the largest
                 bottleneck in deploying machine learning systems. We
                 present Snorkel, a first-of-its-kind system that
                 enables users to train state-of-the-art models without
                 hand labeling any training data. Instead, users write
                 labeling functions that express arbitrary heuristics,
                 which can have unknown accuracies and correlations.
                 Snorkel denoises their outputs without access to ground
                 truth by incorporating the first end-to-end
                 implementation of our recently proposed machine
                 learning paradigm, data programming. We present a
                 flexible interface layer for writing labeling functions
                 based on our experience over the past year
                 collaborating with companies, agencies, and research
                 labs. In a user study, subject matter experts build
                 models $ 2.8 \times $ faster and increase predictive
                 performance an average 45.5\% versus seven hours of
                 hand labeling. We study the modeling tradeoffs in this
                 new setting and propose an optimizer for automating
                 tradeoff decisions that gives up to $ 1.8 \times $
                 speedup per pipeline execution. In two collaborations,
                 with the U.S. Department of Veterans Affairs and the
                 U.S. Food and Drug Administration, and on four
                 open-source text and image data sets representative of
                 other deployments, Snorkel provides 132\% average
                 improvements to predictive performance over prior
                 heuristic approaches and comes within an average 3.60\%
                 of the predictive performance of large hand-curated
                 training sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2017:VPV,
  author =       "Yuliang Li and Alin Deutsch and Victor Vianu",
  title =        "{VERIFAS}: a practical verifier for artifact systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "283--296",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157798",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data-driven workflows, of which IBM's Business
                 Artifacts are a prime exponent, have been successfully
                 deployed in practice, adopted in industrial standards,
                 and have spawned a rich body of research in academia,
                 focused primarily on static analysis. The present
                 research bridges the gap between the theory and
                 practice of artifact verification with VERIFAS, the
                 first implementation of practical significance of an
                 artifact verifier with full support for unbounded data.
                 VERIFAS verifies within seconds linear-time temporal
                 properties over real-world and synthetic workflows of
                 complexity in the range recommended by software
                 engineering practice. Compared to our previous
                 implementation based on the widely-used Spin model
                 checker, VERIFAS not only supports a model with richer
                 data manipulations but also outperforms it by over an
                 order of magnitude. VERIFAS' good performance is due to
                 a novel symbolic representation approach and a family
                 of specialized optimizations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jia:2017:DMG,
  author =       "Zhihao Jia and Yongkee Kwon and Galen Shipman and Pat
                 McCormick and Mattan Erez and Alex Aiken",
  title =        "A distributed multi-{GPU} system for fast graph
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "297--310",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157799",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present Lux, a distributed multi-GPU system that
                 achieves fast graph processing by exploiting the
                 aggregate memory bandwidth of multiple GPUs and taking
                 advantage of locality in the memory hierarchy of
                 multi-GPU clusters. Lux provides two execution models
                 that optimize algorithmic efficiency and enable
                 important GPU optimizations, respectively. Lux also
                 uses a novel dynamic load balancing strategy that is
                 cheap and achieves good load balance across GPUs. In
                 addition, we present a performance model that
                 quantitatively predicts the execution times and
                 automatically selects the runtime configurations for
                 Lux applications. Experiments show that Lux achieves up
                 to 20X speedup over state-of-the-art shared memory
                 systems and up to two orders of magnitude speedup over
                 distributed systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bleifuss:2017:EDC,
  author =       "Tobias Bleifu{\ss} and Sebastian Kruse and Felix
                 Naumann",
  title =        "Efficient denial constraint discovery with {Hydra}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "311--323",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157800",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Denial constraints (DCs) are a generalization of many
                 other integrity constraints (ICs) widely used in
                 databases, such as key constraints, functional
                 dependencies, or order dependencies. Therefore, they
                 can serve as a unified reasoning framework for all of
                 these ICs and express business rules that cannot be
                 expressed by the more restrictive IC types. The process
                 of formulating DCs by hand is difficult, because it
                 requires not only domain expertise but also database
                 knowledge, and due to DCs' inherent complexity, this
                 process is tedious and error-prone. Hence, an automatic
                 DC discovery is highly desirable: we search for all
                 valid denial constraints in a given database instance.
                 However, due to the large search space, the problem of
                 DC discovery is computationally expensive. We propose a
                 new algorithm Hydra, which overcomes the quadratic
                 runtime complexity in the number of tuples of
                 state-of-the-art DC discovery methods. The new
                 algorithm's experimentally determined runtime grows
                 only linearly in the number of tuples. This results in
                 a speedup by orders of magnitude, especially for
                 datasets with a large number of tuples. Hydra can
                 deliver results in a matter of seconds that to date
                 took hours to compute.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Azim:2017:RRC,
  author =       "Tahir Azim and Manos Karpathiotakis and Anastasia
                 Ailamaki",
  title =        "{ReCache}: reactive caching for fast analytics over
                 heterogeneous data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "324--337",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157801",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As data continues to be generated at exponentially
                 growing rates in heterogeneous formats, fast analytics
                 to extract meaningful information is becoming
                 increasingly important. Systems widely use in-memory
                 caching as one of their primary techniques to speed up
                 data analytics. However, caches in data analytics
                 systems cannot rely on simple caching policies and a
                 fixed data layout to achieve good performance.
                 Different datasets and workloads require different
                 layouts and policies to achieve optimal performance.
                 This paper presents ReCache, a cache-based performance
                 accelerator that is reactive to the cost and
                 heterogeneity of diverse raw data formats. Using timing
                 measurements of caching operations and selection
                 operators in a query plan, ReCache accounts for the
                 widely varying costs of reading, parsing, and caching
                 data in nested and tabular formats. Combining these
                 measurements with information about frequently accessed
                 data fields in the workload, ReCache automatically
                 decides whether a nested or relational column-oriented
                 layout would lead to better query performance.
                 Furthermore, ReCache keeps track of commonly utilized
                 operators to make informed cache admission and eviction
                 decisions. Experiments on synthetic and real-world
                 datasets show that our caching techniques decrease
                 caching overhead for individual queries by an average
                 of 59\%. Furthermore, over the entire workload, ReCache
                 reduces execution time by 19-75\% compared to existing
                 techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2017:EED,
  author =       "Long Yuan and Lu Qin and Xuemin Lin and Lijun Chang
                 and Wenjie Zhang",
  title =        "Effective and efficient dynamic graph coloring",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "338--351",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157802",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph coloring is a fundamental graph problem that is
                 widely applied in a variety of applications. The aim of
                 graph coloring is to minimize the number of colors used
                 to color the vertices in a graph such that no two
                 incident vertices have the same color. Existing
                 solutions for graph coloring mainly focus on computing
                 a good coloring for a static graph. However, since many
                 real-world graphs are highly dynamic, in this paper, we
                 aim to incrementally maintain the graph coloring when
                 the graph is dynamically updated. We target on two
                 goals: high effectiveness and high efficiency. To
                 achieve high effectiveness, we maintain the graph
                 coloring in a way such that the coloring result is
                 consistent with one of the best static graph coloring
                 algorithms for large graphs. To achieve high
                 efficiency, we investigate efficient incremental
                 algorithms to update the graph coloring by exploring a
                 small number of vertices. We design a color-propagation
                 based algorithm which only explores the vertices within
                 the 2-hop neighbors of the update-related and
                 color-changed vertices. We then propose a novel color
                 index to maintain some summary color information and,
                 thus, bound the explored vertices within the neighbors
                 of these vertices. Moreover, we derive some effective
                 pruning rules to further reduce the number of
                 propagated vertices. The experimental results
                 demonstrate the high effectiveness and efficiency of
                 our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zacharatou:2017:GRR,
  author =       "Eleni Tzirita Zacharatou and Harish Doraiswamy and
                 Anastasia Ailamaki and Cl{\'a}udio T. Silva and Juliana
                 Freiref",
  title =        "{GPU} rasterization for real-time spatial aggregation
                 over arbitrary polygons",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "352--365",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157803",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Visual exploration of spatial data relies heavily on
                 spatial aggregation queries that slice and summarize
                 the data over different regions. These queries comprise
                 computationally-intensive point-in-polygon tests that
                 associate data points to polygonal regions, challenging
                 the responsiveness of visualization tools. This
                 challenge is compounded by the sheer amounts of data,
                 requiring a large number of such tests to be performed.
                 Traditional pre-aggregation approaches are unsuitable
                 in this setting since they fix the query constraints
                 and support only rectangular regions. On the other
                 hand, query constraints are defined interactively in
                 visual analytics systems, and polygons can be of
                 arbitrary shapes. In this paper, we convert a spatial
                 aggregation query into a set of drawing operations on a
                 canvas and leverage the rendering pipeline of the
                 graphics hardware (GPU) to enable interactive response
                 times. Our technique trades-off accuracy for response
                 time by adjusting the canvas resolution, and can even
                 provide accurate results when combined with a polygon
                 index. We evaluate our technique on two large
                 real-world data sets, exhibiting superior performance
                 compared to index-based approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shah:2017:KFK,
  author =       "Vraj Shah and Arun Kumar and Xiaojin Zhu",
  title =        "Are key--foreign key joins safe to avoid when learning
                 high-capacity classifiers?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "366--379",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157804",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Machine learning (ML) over relational data is a
                 booming area of data management. While there is a lot
                 of work on scalable and fast ML systems, little work
                 has addressed the pains of sourcing data for ML tasks.
                 Real-world relational databases typically have many
                 tables (often, dozens) and data scientists often
                 struggle to even obtain all tables for joins before ML.
                 In this context, Kumar et al. showed recently that
                 key-foreign key dependencies (KFKDs) between tables
                 often lets us avoid such joins without significantly
                 affecting prediction accuracy-an idea they called
                 ``avoiding joins safely.'' While initially
                 controversial, this idea has since been used by
                 multiple companies to reduce the burden of data
                 sourcing for ML. But their work applied only to linear
                 classifiers. In this work, we verify if their results
                 hold for three popular high-capacity classifiers:
                 decision trees, non-linear SVMs, and ANNs. We conduct
                 an extensive experimental study using both real-world
                 datasets and simulations to analyze the effects of
                 avoiding KFK joins on such models. Our results show
                 that these high-capacity classifiers are surprisingly
                 and counter-intuitively more robust to avoiding KFK
                 joins compared to linear classifiers, refuting an
                 intuition from the prior work's analysis. We explain
                 this behavior intuitively and identify open questions
                 at the intersection of data management and ML
                 theoretical research. All of our code and datasets are
                 available for download from
                 http://cseweb.ucsd.edu/~arunkk/hamlet.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2017:WRC,
  author =       "Zheng Liu and Lei Chen",
  title =        "Worker recommendation for crowdsourced {Q\&A}
                 services: a triple-factor aware approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "3",
  pages =        "380--392",
  month =        nov,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3157794.3157805",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Dec 11 16:07:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Worker Recommendation (WR) is one of the most
                 important functions for crowdsourced Q\&A services.
                 Specifically, given a set of tasks to be solved, WR
                 recommends each task with a certain group of workers,
                 whom are expected to give timely answers with high
                 qualities. To address the WR problem, recent studies
                 have introduced a number of recommendation approaches,
                 which take advantage of workers' expertises or
                 preferences towards different types of tasks. However,
                 without a thorough consideration of workers'
                 characters, such approaches will lead to either
                 inadequate task fulfillment or inferior answer quality.
                 In this work, we propose the Triple-factor Aware Worker
                 Recommendation framework, which collectively considers
                 workers' expertises, preferences and activenesses to
                 maximize the overall production of high quality
                 answers. We construct the Latent Hierarchical
                 Factorization Model, which is able to infer the tasks'
                 underlying categories and workers' latent characters
                 from the historical data; and we propose a novel
                 parameter inference method, which only requires the
                 processing of positive instances, giving rise to
                 significantly higher time efficiency and better
                 inference quality. What's more, the sampling-based
                 recommendation algorithm is developed, such that the
                 near optimal worker recommendation can be generated for
                 a presented batch of tasks with considerably reduced
                 time consumption. Comprehensive experiments have been
                 carried out using both real and synthetic datasets,
                 whose results verify the effectiveness and efficiency
                 of our proposed methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gong:2017:CSD,
  author =       "Shufeng Gong and Yanfeng Zhang and Ge Yu",
  title =        "Clustering stream data by exploring the evolution of
                 density mountain",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "393--405",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164136",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Stream clustering is a fundamental problem in many
                 streaming data analysis applications. Comparing to
                 classical batch-mode clustering, there are two key
                 challenges in stream clustering: (i) Given that input
                 data are changing continuously, how to incrementally
                 update their clustering results efficiently? (ii) Given
                 that clusters continuously evolve with the evolution of
                 data, how to capture the cluster evolution activities?
                 Unfortunately, most of existing stream clustering
                 algorithms can neither update the cluster result in
                 real-time nor track the evolution of clusters. In this
                 paper, we propose a stream clustering algorithm
                 EDMStream by exploring the Evolution of Density
                 Mountain. The density mountain is used to abstract the
                 data distribution, the changes of which indicate data
                 distribution evolution. We track the evolution of
                 clusters by monitoring the changes of density
                 mountains. We further provide efficient data structures
                 and filtering schemes to ensure that the update of
                 density mountains is in real-time, which makes online
                 clustering possible. The experimental results on
                 synthetic and real datasets show that, comparing to the
                 state-of-the-art stream clustering algorithms, e.g.,
                 D-Stream, DenStream, DBSTREAM and MR-Stream, our
                 algorithm is able to response to a cluster update much
                 faster (say 7-15x faster than the best of the
                 competitors) and at the same time achieve comparable
                 cluster quality. Furthermore, EDMStream successfully
                 captures the cluster evolution activities.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2017:QFL,
  author =       "Tianzheng Wang and Ryan Johnson and Ippokratis
                 Pandis",
  title =        "Query fresh: log shipping on steroids",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "406--419",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164137",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Hot standby systems often have to trade safety (i.e.,
                 not losing committed work) and freshness (i.e., having
                 access to recent updates) for performance. Guaranteeing
                 safety requires synchronous log shipping that blocks
                 the primary until the log records are durably
                 replicated in one or multiple backups; maintaining
                 freshness necessitates fast log replay on backups, but
                 is often defeated by the dual-copy architecture and
                 serial replay: a backup must generate the ``real'' data
                 from the log to make recent updates accessible to
                 read-only queries. This paper proposes Query Fresh, a
                 hot standby system that provides both safety and
                 freshness while maintaining high performance on the
                 primary. The crux is an append-only storage
                 architecture used in conjunction with fast networks
                 (e.g., InfiniBand) and byte-addressable, non-volatile
                 memory (NVRAM). Query Fresh avoids the dual-copy design
                 and treats the log as the database, enabling
                 lightweight, parallel log replay that does not block
                 the primary. Experimental results using the TPC-C
                 benchmark show that under Query Fresh, backup servers
                 can replay log records faster than they are generated
                 by the primary server, using one quarter of the
                 available compute resources. With a 56Gbps network,
                 Query Fresh can support up to 4--5 synchronous
                 replicas, each of which receives and replays $ \approx
                 $1.4GB of log records per second, with up to 4--6\%
                 overhead on the primary compared to a standalone server
                 that achieves 620kTPS without replication.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sahu:2017:ULG,
  author =       "Siddhartha Sahu and Amine Mhedhbi and Semih Salihoglu
                 and Jimmy Lin and M. Tamer {\"O}zsu",
  title =        "The ubiquity of large graphs and surprising challenges
                 of graph processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "420--431",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164139",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph processing is becoming increasingly prevalent
                 across many application domains. In spite of this
                 prevalence, there is little research about how graphs
                 are actually used in practice. We conducted an online
                 survey aimed at understanding: (i) the types of graphs
                 users have; (ii) the graph computations users run;
                 (iii) the types of graph software users use; and (iv)
                 the major challenges users face when processing their
                 graphs. We describe the participants' responses to our
                 questions highlighting common patterns and challenges.
                 We further reviewed user feedback in the mailing lists,
                 bug reports, and feature requests in the source
                 repositories of a large suite of software products for
                 processing graphs. Through our review, we were able to
                 answer some new questions that were raised by
                 participants' responses and identify specific
                 challenges that users face when using different classes
                 of graph software. The participants' responses and data
                 we obtained revealed surprising facts about graph
                 processing in practice. In particular, real-world
                 graphs represent a very diverse range of entities and
                 are often very large, and scalability and visualization
                 are undeniably the most pressing challenges faced by
                 participants. We hope these findings can guide future
                 research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ramachandra:2017:FOI,
  author =       "Karthik Ramachandra and Kwanghyun Park and K.
                 Venkatesh Emani and Alan Halverson and C{\'e}sar
                 Galindo-Legaria and Conor Cunningham",
  title =        "{Froid}: optimization of imperative programs in a
                 relational database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "432--444",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164140",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "For decades, RDBMSs have supported declarative SQL as
                 well as imperative functions and procedures as ways for
                 users to express data processing tasks. While the
                 evaluation of declarative SQL has received a lot of
                 attention resulting in highly sophisticated techniques,
                 the evaluation of imperative programs has remained
                 na{\"\i}ve and highly inefficient. Imperative programs
                 offer several benefits over SQL and hence are often
                 preferred and widely used. But unfortunately, their
                 abysmal performance discourages, and even prohibits
                 their use in many situations. We address this important
                 problem that has hitherto received little attention. We
                 present Froid, an extensible framework for optimizing
                 imperative programs in relational databases. Froid's
                 novel approach automatically transforms entire User
                 Defined Functions (UDFs) into relational algebraic
                 expressions, and embeds them into the calling SQL
                 query. This form is now amenable to cost-based
                 optimization and results in efficient, set-oriented,
                 parallel plans as opposed to inefficient, iterative,
                 serial execution of UDFs. Froid's approach additionally
                 brings the benefits of many compiler optimizations to
                 UDFs with no additional implementation effort. We
                 describe the design of Froid and present our
                 experimental evaluation that demonstrates performance
                 improvements of up to multiple orders of magnitude on
                 real workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2017:ESH,
  author =       "Ye Li and Leong Hou U. and Man Lung Yiu and Ngai Meng
                 Kou",
  title =        "An experimental study on hub labeling based shortest
                 path algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "445--457",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164141",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Shortest path distance retrieval is a core component
                 in many important applications. For a decade, hub
                 labeling (HL) techniques have been considered as a
                 practical solution with fast query response time (e.g.,
                 1--3 orders of magnitude faster), competitive indexing
                 time, and slightly larger storage overhead (e.g.,
                 several times larger). These techniques enhance query
                 throughput up to hundred thousands queries per second,
                 which is particularly helpful in large user
                 environment. Despite the importance of HL techniques,
                 we are not aware of any comprehensive experimental
                 study on HL techniques. Thus it is difficult for a
                 practitioner to adopt HL techniques for her
                 applications. To address the above issues, we provide a
                 comprehensive experimental study on the
                 state-of-the-art HL technique with analysis of their
                 efficiency, effectiveness and applicability. From
                 insightful summary of different HL techniques, we
                 further develop a simple yet effective HL techniques
                 called Significant path based Hub Pushing (SHP) which
                 greatly improves indexing time of previous techniques
                 while retains good query performance. We also
                 complement extensive comparisons between HL techniques
                 and other shortest path solutions to demonstrate
                 robustness and efficiency of HL techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Merritt:2017:CLS,
  author =       "Alexander Merritt and Ada Gavrilovska and Yuan Chen
                 and Dejan Milojicic",
  title =        "Concurrent log-structured memory for many-core
                 key--value stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "458--471",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164142",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Key-value stores are an important tool in managing and
                 accessing large in-memory data sets. As many
                 applications benefit from having as much of their
                 working state fit into main memory, an important design
                 of the memory management of modern key--value stores is
                 the use of log-structured approaches, enabling
                 efficient use of the memory capacity, by compacting
                 objects to avoid fragmented states. However, with the
                 emergence of thousand-core and peta-byte memory
                 platforms (DRAM or future storage-class memories)
                 log-structured designs struggle to scale, preventing
                 parallel applications from exploiting the full
                 capabilities of the hardware: careful coordination is
                 required for background activities (compacting and
                 organizing memory) to remain asynchronous with respect
                 to the use of the interface, and for insertion
                 operations to avoid contending for centralized
                 resources such as the log head and memory pools. In
                 this work, we present the design of a log-structured
                 key--value store called Nibble that incorporates a
                 multi-head log for supporting concurrent writes, a
                 novel distributed epoch mechanism for scalable memory
                 reclamation, and an optimistic concurrency index. We
                 implement Nibble in the Rust language in ca. 4000 lines
                 of code, and evaluate it across a variety of
                 data-serving workloads on a 240-core cache-coherent
                 server. Our measurements show Nibble scales linearly in
                 uniform YCSB workloads, matching competitive
                 non-log-structured key--value stores for write-
                 dominated traces at 50 million operations per second on
                 1 TiB-sized working sets. Our memory analysis shows
                 Nibble is efficient, requiring less than 10\%
                 additional capacity, whereas memory use by
                 non-log-structured key--value store designs may be as
                 high as 2x.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ceccarello:2017:CUG,
  author =       "Matteo Ceccarello and Carlo Fantozzi and Andrea
                 Pietracaprina and Geppino Pucci and Fabio Vandin",
  title =        "Clustering uncertain graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "472--484",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164143",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "An uncertain graph $ G = (V, E, p : E \to (0, 1]) $
                 can be viewed as a probability space whose outcomes
                 (referred to as possible worlds ) are subgraphs of $G$
                 where any edge $ e \epsilon E$ occurs with probability
                 $ p(e)$, independently of the other edges. These graphs
                 naturally arise in many application domains where data
                 management systems are required to cope with
                 uncertainty in interrelated data, such as computational
                 biology, social network analysis, network reliability,
                 and privacy enforcement, among the others. For this
                 reason, it is important to devise fundamental querying
                 and mining primitives for uncertain graphs. This paper
                 contributes to this endeavor with the development of
                 novel strategies for clustering uncertain graphs.
                 Specifically, given an uncertain graph $G$ and an
                 integer $k$, we aim at partitioning its nodes into $k$
                 clusters, each featuring a distinguished center node,
                 so to maximize the minimum/average connection
                 probability of any node to its cluster's center, in a
                 random possible world. We assess the NP-hardness of
                 maximizing the minimum connection probability, even in
                 the presence of an oracle for the connection
                 probabilities, and develop efficient approximation
                 algorithms for both problems and some useful variants.
                 Unlike previous works in the literature, our algorithms
                 feature provable approximation guarantees and are
                 capable to keep the granularity of the returned
                 clustering under control. Our theoretical findings are
                 complemented with several experiments that compare our
                 algorithms against some relevant competitors, with
                 respect to both running-time and quality of the
                 returned clusterings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abdelaziz:2017:LSQ,
  author =       "Ibrahim Abdelaziz and Essam Mansour and Mourad Ouzzani
                 and Ashraf Aboulnaga and Panos Kalnis",
  title =        "{Lusail}: a system for querying linked data at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "485--498",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164144",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The RDF data model allows publishing interlinked RDF
                 datasets, where each dataset is independently
                 maintained and is queryable via a SPARQL endpoint. Many
                 applications would benefit from querying the resulting
                 large, decentralized, geo-distributed graph through a
                 federated SPARQL query processor. A crucial factor for
                 good performance in federated query processing is
                 pushing as much computation as possible to the local
                 endpoints. Surprisingly, existing federated SPARQL
                 engines are not effective at this task since they rely
                 only on schema information. Consequently, they cause
                 unnecessary data retrieval and communication, leading
                 to poor scalability and response time. This paper
                 addresses these limitations and presents Lusail, a
                 scalable and efficient federated SPARQL system for
                 querying large RDF graphs that are geo-distributed on
                 different endpoints. Lusail uses a novel query
                 rewriting algorithm to push computation to the local
                 endpoints by relying on information about the RDF
                 instances and not only the schema. The query rewriting
                 algorithm has the additional advantage of exposing
                 parallelism in query processing, which Lusail exploits
                 through advanced scheduling at query run time. Our
                 experiments on billions of triples of real and
                 synthetic data show that Lusail outperforms
                 state-of-the-art systems by orders of magnitude in
                 terms of scalability and response time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Harmouch:2017:CEE,
  author =       "Hazar Harmouch and Felix Naumann",
  title =        "Cardinality estimation: an experimental survey",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "499--512",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164145",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data preparation and data profiling comprise many both
                 basic and complex tasks to analyze a dataset at hand
                 and extract metadata, such as data distributions, key
                 candidates, and functional dependencies. Among the most
                 important types of metadata is the number of distinct
                 values in a column, also known as the zeroth-frequency
                 moment. Cardinality estimation itself has been an
                 active research topic in the past decades due to its
                 many applications. The aim of this paper is to review
                 the literature of cardinality estimation and to present
                 a detailed experimental study of twelve algorithms,
                 scaling far beyond the original experiments. First, we
                 outline and classify approaches to solve the problem of
                 cardinality estimation --- we describe their main idea,
                 error-guarantees, advantages, and disadvantages. Our
                 experimental survey then compares the performance all
                 twelve cardinality estimation algorithms. We evaluate
                 the algorithms' accuracy, runtime, and memory
                 consumption using synthetic and real-world datasets.
                 Our results show that different algorithms excel in
                 different in categories, and we highlight their
                 trade-offs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Park:2017:SSL,
  author =       "Jong-Hyeok Park and Gihwan Oh and Sang-Won Lee",
  title =        "{SQL} statement logging for making {SQLite} truly
                 lite",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "4",
  pages =        "513--525",
  month =        dec,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164146",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Feb 15 16:29:05 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The lightweight codebase of SQLite was helpful in
                 making it become the de-facto standard database in most
                 mobile devices, but, at the same time, forced it to
                 take less-complicated transactional schemes, such as
                 physical page logging, journaling, and force commit,
                 which in turn cause excessive write amplification.
                 Thus, the write IO cost in SQLite is not lightweight at
                 all. In this paper, to make SQLite truly lite in terms
                 of IO efficiency for the transactional support, we
                 propose SQLite/SSL, a per-transaction SQL statement
                 logging scheme: when a transaction commits, SQLite/SSL
                 ensures its durability by storing only SQL statements
                 of small size, thus writing less and performing faster
                 at no compromise of transactional solidity. Our main
                 contribution is to show that, based on the observation
                 that mobile transactions tend to be short and exhibit
                 strong update locality, logical logging can, though
                 long discarded, become an elegant and perfect fit for
                 SQLite-based mobile applications. Further, we leverage
                 the WAL journal mode in vanilla SQLite as a
                 transaction-consistent checkpoint mechanism which is
                 indispensable in any logical logging scheme. In
                 addition, we show for the first time that
                 byte-addressable NVM (non-volatile memory) in host-side
                 can realize the full potential of logical logging
                 because it allows to store fine-grained logs quickly.
                 We have prototyped SQLite/SSL by augmenting vanilla
                 SQLite with a transaction-consistent checkpoint
                 mechanism and a redo-only recovery logic, and have
                 evaluated its performance using a set of synthetic and
                 real workloads. When a real NVM board is used as its
                 log device, SQLite/SSL can outperform vanilla SQLite's
                 WAL mode by up to 300x and also outperform the
                 state-of-the-arts SQLite/PPL scheme by several folds in
                 terms of IO time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
  remark =       "The speedups come from employing nonvolatile memory
                 (which costs about 10 times as much as DRAM) for
                 database updates, and delaying writes to the
                 filesystem, which is important for SSD devices that
                 have limited write life. The target platform is mobile
                 devices. There is no mention of whether the extensions
                 to the public-domain SQLite3 code are available to
                 others.",
}

@Article{Johnson:2018:TPD,
  author =       "Noah Johnson and Joseph P. Near and Dawn Song",
  title =        "Towards practical differential privacy for {SQL}
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "526--539",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177732.3177733",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Differential privacy promises to enable general data
                 analytics while protecting individual privacy, but
                 existing differential privacy mechanisms do not support
                 the wide variety of features and databases used in
                 real-world SQL-based analytics systems. This paper
                 presents the first practical approach for differential
                 privacy of SQL queries. Using 8.1 million real-world
                 queries, we conduct an empirical study to determine the
                 requirements for practical differential privacy, and
                 discuss limitations of previous approaches in light of
                 these requirements. To meet these requirements we
                 propose elastic sensitivity, a novel method for
                 approximating the local sensitivity of queries with
                 general equijoins. We prove that elastic sensitivity is
                 an upper bound on local sensitivity and can therefore
                 be used to enforce differential privacy using any local
                 sensitivity-based mechanism. We build FLEX, a practical
                 end-to-end system to enforce differential privacy for
                 SQL queries using elastic sensitivity. We demonstrate
                 that FLEX is compatible with any existing database, can
                 enforce differential privacy for real-world SQL
                 queries, and incurs negligible (0.03\%) performance
                 overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shraer:2018:CSS,
  author =       "Alexander Shraer and Alexandre Aybes and Bryan Davis
                 and Christos Chrysafis and Dave Browning and Eric
                 Krugler and Eric Stone and Harrison Chandler and Jacob
                 Farkas and John Quinn and Jonathan Ruben and Michael
                 Ford and Mike McMahon and Nathan Williams and Nicolas
                 Favre-Felix and Nihar Sharma and Ori Herrnstadt and
                 Paul Seligman and Raghav Pisolkar and Scott Dugas and
                 Scott Gray and Sytze Harkema and Valentin Kravtsov and
                 Vanessa Hong and Wan Ling Yih and Yizuo Tian",
  title =        "{Cloudkit}: structured storage for mobile
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "540--552",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164138",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "CloudKit is Apple's cloud backend service and
                 application development framework that provides
                 strongly-consistent storage for structured data and
                 makes it easy to synchronize data across user devices
                 or share it among multiple users. Launched more than 3
                 years ago, CloudKit forms the foundation for more than
                 50 Apple apps, including many of our most important and
                 popular applications such as Photos, iCloud Drive,
                 Notes, Keynote, and News, as well as many third-party
                 apps. To deliver this at large scale, CloudKit
                 explicitly leverages multi-tenancy at the application
                 level as well as at the user level to guide efficient
                 data placement and distribution. By using CloudKit
                 application developers are free to focus on delivering
                 the application front-end and logic while relying on
                 CloudKit for scale, consistency, durability and
                 security. CloudKit manages petabytes of data and
                 handles hundreds of millions of users around the world
                 on a daily basis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arulraj:2018:BHP,
  author =       "Joy Arulraj and Justin Levandoski and Umar Farooq
                 Minhas and Per-Ake Larson",
  title =        "{Bztree}: a high-performance latch-free range index
                 for non-volatile memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "553--565",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3164135.3164147",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Storing a database (rows and indexes) entirely in
                 non-volatile memory (NVM) potentially enables both high
                 performance and fast recovery. To fully exploit
                 parallelism on modern CPUs, modern main-memory
                 databases use latch-free (lock-free) index structures,
                 e.g. Bw-tree or skip lists. To achieve high performance
                 NVM-resident indexes also need to be latch-free. This
                 paper describes the design of the BzTree, a latch-free
                 B-tree index designed for NVM. The BzTree uses a
                 persistent multi-word compare-and-swap operation
                 (PMwCAS) as a core building block, enabling an index
                 design that has several important advantages compared
                 with competing index structures such as the Bw-tree.
                 First, the BzTree is latch-free yet simple to
                 implement. Second, the BzTree is fast --- showing up to
                 2x higher throughput than the Bw-tree in our
                 experiments. Third, the BzTree does not require any
                 special-purpose recovery code. Recovery is
                 near-instantaneous and only involves rolling back (or
                 forward) any PMwCAS operations that were in-flight
                 during failure. Our end-to-end recovery experiments of
                 BzTree report an average recovery time of 145 $ \mu $
                 s. Finally, the same BzTree implementation runs
                 seamlessly on both volatile RAM and NVM, which greatly
                 reduces the cost of code maintenance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2018:FFP,
  author =       "Yuzhen Huang and Tatiana Jin and Yidi Wu and Zhenkun
                 Cai and Xiao Yan and Fan Yang and Jinfeng Li and Yuying
                 Guo and James Cheng",
  title =        "{FlexPS}: flexible parallelism control in parameter
                 server architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "566--579",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177732.3177734",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As a general abstraction for coordinating the
                 distributed storage and access of model parameters, the
                 parameter server (PS) architecture enables distributed
                 machine learning to handle large datasets and high
                 dimensional models. Many systems, such as Parameter
                 Server and Petuum, have been developed based on the PS
                 architecture and widely used in practice. However, none
                 of these systems supports changing parallelism during
                 runtime, which is crucial for the efficient execution
                 of machine learning tasks with dynamic workloads. We
                 propose a new system, called FlexPS, which introduces a
                 novel multi-stage abstraction to support flexible
                 parallelism control. With the multi-stage abstraction,
                 a machine learning task can be mapped to a series of
                 stages and the parallelism for a stage can be set
                 according to its workload. Optimizations such as stage
                 scheduler, stage-aware consistency controller, and
                 direct model transfer are proposed for the efficiency
                 of multi-stage machine learning in FlexPS. As a general
                 and complete PS systems, FlexPS also incorporates many
                 optimizations that are not limited to multi-stage
                 machine learning. We conduct extensive experiments
                 using a variety of machine learning workloads, showing
                 that FlexPS achieves significant speedups and resource
                 saving compared with the state-of-the-art PS systems
                 such as Petuum and Multiverso.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yaghmazadeh:2018:AMH,
  author =       "Navid Yaghmazadeh and Xinyu Wang and Isil Dillig",
  title =        "Automated migration of hierarchical data to relational
                 tables using programming-by-example",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "580--593",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177732.3177735",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "While many applications export data in hierarchical
                 formats like XML and JSON, it is often necessary to
                 convert such hierarchical documents to a relational
                 representation. This paper presents a novel
                 programming-by-example approach, and its implementation
                 in a tool called Mitra, for automatically migrating
                 tree-structured documents to relational tables. We have
                 evaluated the proposed technique using two sets of
                 experiments. In the first experiment, we used Mitra to
                 automate 98 data transformation tasks collected from
                 StackOverflow. Our method can generate the desired
                 program for 94\% of these benchmarks with an average
                 synthesis time of 3.8 seconds. In the second
                 experiment, we used Mitra to generate programs that can
                 convert real-world XML and JSON datasets to
                 full-fledged relational databases. Our evaluation shows
                 that Mitra can automate the desired transformation for
                 all datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Luo:2018:TTO,
  author =       "Siqiang Luo and Ben Kao and Guoliang Li and Jiafeng Hu
                 and Reynold Cheng and Yudian Zheng",
  title =        "{TOAIN}: a throughput optimizing adaptive index for
                 answering dynamic {$k$ NN} queries on road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "594--606",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177732.3177736",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the classical k NN queries on road networks.
                 Existing solutions mostly focus on reducing query
                 processing time. In many applications, however, system
                 throughput is a more important measure. We devise a
                 mathematical model that describes throughput in terms
                 of a number of system characteristics. We show that
                 query time is only one of the many parameters that
                 impact throughput. Others include update time and
                 query/update arrival rates. We show that the
                 traditional approach of improving query time alone is
                 generally inadequate in optimizing throughput.
                 Moreover, existing solutions lack flexibility in
                 adapting to environments of different characteristics.
                 We propose Toain, which is a very flexible algorithm
                 that can be easily trained to adapt to a given
                 environment for maximizing query throughput. We conduct
                 extensive experiments on both real and synthetic data
                 and show that Toain gives significantly higher
                 throughput compared with existing solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2018:EMT,
  author =       "Tian Li and Jie Zhong and Ji Liu and Wentao Wu and Ce
                 Zhang",
  title =        "{Ease.ml}: towards multi-tenant resource sharing for
                 machine learning workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "607--620",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177732.3177737",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present ease.ml, a declarative machine learning
                 service platform. With ease.ml, a user defines the
                 high-level schema of an ML application and submits the
                 task via a Web interface. The system then deals with
                 the rest, such as model selection and data movement.
                 The ultimate question we hope to understand is that, as
                 a ``service provider'' that manages a shared cluster of
                 machines running machine learning workloads, what is
                 the resource sharing strategy that maximizes the global
                 satisfaction of all our users? This paper does not
                 completely answer this general question, but focuses on
                 solving the first technical challenge we were facing
                 when trying to build ease.ml. We observe that resource
                 sharing is a critical yet subtle issue in this
                 multi-tenant scenario, as we have to balance between
                 efficiency and fairness. We first formalize the problem
                 that we call multi-tenant model selection, aiming for
                 minimizing the total regret of all users running
                 automatic model selection tasks. We then develop a
                 novel algorithm that combines multi-armed bandits with
                 Bayesian optimization and prove a regret bound under
                 the multi-tenant setting. Finally, we report our
                 evaluation of ease.ml on synthetic data and on two
                 services we are providing to our users, namely, image
                 classification with deep neural networks and binary
                 classification with Azure ML Studio. Our experimental
                 evaluation results show that our proposed solution can
                 be up to 9.8x faster in achieving the same global
                 average accuracy for all users as the two popular
                 heuristics used by our users before ease.ml, and 4.1 x
                 faster than state-of-the-art systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qi:2018:TOE,
  author =       "Jianzhong Qi and Yufei Tao and Yanchuan Chang and Rui
                 Zhang",
  title =        "Theoretically optimal and empirically efficient
                 {R}-trees with strong parallelizability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "621--634",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177732.3177738",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The massive amount of data and large variety of data
                 distributions in the big data era call for access
                 methods that are efficient in both query processing and
                 index bulk-loading, and over both practical and
                 worst-case workloads. To address this need, we revisit
                 a classic multidimensional access method --- the
                 R-tree. We propose a novel R-tree packing strategy that
                 produces R-trees with an asymptotically optimal I/O
                 complexity for window queries in the worst case. Our
                 experiments show that the R-trees produced by the
                 proposed strategy are highly efficient on real and
                 synthetic data of different distributions. The proposed
                 strategy is also simple to parallelize, since it relies
                 only on sorting. We propose a parallel algorithm for
                 R-tree bulk-loading based on the proposed packing
                 strategy, and analyze its performance under the
                 massively parallel communication model. Experimental
                 results confirm the efficiency and scalability of the
                 parallel algorithm over large data sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lin:2018:DAM,
  author =       "Xueling Lin and Lei Chen",
  title =        "Domain-aware multi-truth discovery from conflicting
                 sources",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "635--647",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177732.3177739",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the Big Data era, truth discovery has served as a
                 promising technique to solve conflicts in the facts
                 provided by numerous data sources. The most significant
                 challenge for this task is to estimate source
                 reliability and select the answers supported by high
                 quality sources. However, existing works assume that
                 one data source has the same reliability on any kinds
                 of entity, ignoring the possibility that a source may
                 vary in reliability on different domains. To capture
                 the influence of various levels of expertise in
                 different domains, we integrate domain expertise
                 knowledge to achieve a more precise estimation of
                 source reliability. We propose to infer the domain
                 expertise of a data source based on its data richness
                 in different domains. We also study the mutual
                 influence between domains, which will affect the
                 inference of domain expertise. Through leveraging the
                 unique features of the multi-truth problem that sources
                 may provide partially correct values of a data item, we
                 assign more reasonable confidence scores to value sets.
                 We propose an integrated Bayesian approach to
                 incorporate the domain expertise of data sources and
                 confidence scores of value sets, aiming to find
                 multiple possible truths without any supervision.
                 Experimental results on two real-world datasets
                 demonstrate the feasibility, efficiency and
                 effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tian:2018:CAL,
  author =       "Boyu Tian and Jiamin Huang and Barzan Mozafari and
                 Grant Schoenebeck",
  title =        "Contention-aware lock scheduling for transactional
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "5",
  pages =        "648--662",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3177732.3177740",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 17 07:25:04 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Lock managers are among the most studied components in
                 concurrency control and transactional systems. However,
                 one question seems to have been generally overlooked:
                 ``When there are multiple lock requests on the same
                 object, which one(s) should be granted first?'' Nearly
                 all existing systems rely on a FIFO (first in, first
                 out) strategy to decide which transaction(s) to grant
                 the lock to. In this paper, however, we show that the
                 lock scheduling choices have significant ramifications
                 on the overall performance of a transactional system.
                 Despite the large body of research on job scheduling
                 outside the database context, lock scheduling presents
                 subtle but challenging requirements that render
                 existing results on scheduling inapt for a
                 transactional database. By carefully studying this
                 problem, we present the concept of contention-aware
                 scheduling, show the hardness of the problem, and
                 propose novel lock scheduling algorithms (LDSF and
                 bLDSF), which guarantee a constant factor approximation
                 of the best scheduling. We conduct extensive
                 experiments using a popular database on both TPC-C and
                 a microbenchmark. Compared to FIFO---the default
                 scheduler in most database systems---our bLDSF
                 algorithm yields up to 300x speedup in overall
                 transaction latency. Alternatively, our LDSF algorithm,
                 which is simpler and achieves comparable performance to
                 bLDSF, has already been adopted by open-source
                 community, and was chosen as the default scheduling
                 strategy in MySQL 8.0.3+",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Patel:2018:QDP,
  author =       "Jignesh M. Patel and Harshad Deshmukh and Jianqiao Zhu
                 and Navneet Potti and Zuyu Zhang and Marc Spehlmann and
                 Hakan Memisoglu and Saket Saurabh",
  title =        "{Quickstep}: a data platform based on the scaling-up
                 approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "6",
  pages =        "663--676",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3184470.3184471",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 10 06:50:54 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern servers pack enough storage and computing power
                 that just a decade ago was spread across a modest-sized
                 cluster. This paper presents a prototype system, called
                 Quickstep, to exploit the large amount of parallelism
                 that is packed inside modern servers. Quickstep builds
                 on a vast body of previous methods for organizing data,
                 optimizing, scheduling and executing queries, and
                 brings them together in a single system. Quickstep also
                 includes new query processing methods that go beyond
                 previous approaches. To keep the project focused, the
                 project's initial target is read-mostly in-memory data
                 warehousing workloads in single-node settings. In this
                 paper, we describe the design and implementation of
                 Quickstep for this target application space. We also
                 present experimental results comparing the performance
                 of Quickstep to a number of other systems,
                 demonstrating that Quickstep is often faster than many
                 other contemporary systems, and in some cases faster by
                 orders-of-magnitude. Quickstep is an Apache
                 (incubating) project.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kondylakis:2018:CSB,
  author =       "Haridimos Kondylakis and Niv Dayan and Kostas
                 Zoumpatianos and Themis Palpanas",
  title =        "{Coconut}: a scalable bottom-up approach for building
                 data series indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "6",
  pages =        "677--690",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3184470.3184472",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 10 06:50:54 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many modern applications produce massive amounts of
                 data series that need to be analyzed, requiring
                 efficient similarity search operations. However, the
                 state-of-the-art data series indexes that are used for
                 this purpose do not scale well for massive datasets in
                 terms of performance, or storage costs. We pinpoint the
                 problem to the fact that existing summarizations of
                 data series used for indexing cannot be sorted while
                 keeping similar data series close to each other in the
                 sorted order. This leads to two design problems. First,
                 traditional bulk-loading algorithms based on sorting
                 cannot be used. Instead, index construction takes place
                 through slow top-down insertions, which create a
                 non-contiguous index that results in many random I/Os.
                 Second, data series cannot be sorted and split across
                 nodes evenly based on their median value; thus, most
                 leaf nodes are in practice nearly empty. This further
                 slows down query speed and amplifies storage costs. To
                 address these problems, we present Coconut. The first
                 innovation in Coconut is an inverted, sortable data
                 series summarization that organizes data series based
                 on a z-order curve, keeping similar series close to
                 each other in the sorted order. As a result, Coconut is
                 able to use bulk-loading techniques that rely on
                 sorting to quickly build a contiguous index using large
                 sequential disk I/Os. We then explore prefix-based and
                 median-based splitting policies for bottom-up
                 bulk-loading, showing that median-based splitting
                 outperforms the state of the art, ensuring that all
                 nodes are densely populated. Overall, we show
                 analytically and empirically that Coconut dominates the
                 state-of-the-art data series indexes in terms of
                 construction speed, query speed, and storage costs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ammar:2018:DES,
  author =       "Khaled Ammar and Frank McSherry and Semih Salihoglu
                 and Manas Joglekar",
  title =        "Distributed evaluation of subgraph queries using
                 worst-case optimal low-memory dataflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "6",
  pages =        "691--704",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3184470.3184473",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 10 06:50:54 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of finding and monitoring
                 fixed-size subgraphs in a continually changing
                 large-scale graph. We present the first approach that
                 (i) performs worst-case optimal computation and
                 communication, (ii) maintains a total memory footprint
                 linear in the number of input edges, and (iii) scales
                 down per-worker computation, communication, and memory
                 requirements linearly as the number of workers
                 increases, even on adversarially skewed inputs. Our
                 approach is based on worst-case optimal join
                 algorithms, recast as a data-parallel dataflow
                 computation. We describe the general algorithm and
                 modifications that make it robust to skewed data, prove
                 theoretical bounds on its resource requirements in the
                 massively parallel computing model, and implement and
                 evaluate it on graphs containing as many as 64 billion
                 edges. The underlying algorithm and ideas generalize
                 from finding and monitoring subgraphs to the more
                 general problem of computing and maintaining relational
                 equi-joins over dynamic relations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2018:MFC,
  author =       "Teng Li and Zhiyuan Xu and Jian Tang and Yanzhi Wang",
  title =        "Model-free control for distributed stream data
                 processing using deep reinforcement learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "6",
  pages =        "705--718",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3184470.3184474",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 10 06:50:54 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we focus on general-purpose Distributed
                 Stream Data Processing Systems (DSDPSs), which deal
                 with processing of unbounded streams of continuous data
                 at scale distributedly in real or near-real time. A
                 fundamental problem in a DSDPS is the scheduling
                 problem (i.e., assigning workload to workers/machines)
                 with the objective of minimizing average end-to-end
                 tuple processing time. A widely-used solution is to
                 distribute workload evenly over machines in the cluster
                 in a round-robin manner, which is obviously not
                 efficient due to lack of consideration for
                 communication delay. Model-based approaches (such as
                 queueing theory) do not work well either due to the
                 high complexity of the system environment. We aim to
                 develop a novel model-free approach that can learn to
                 well control a DSDPS from its experience rather than
                 accurate and mathematically solvable system models,
                 just as a human learns a skill (such as cooking,
                 driving, swimming, etc). Specifically, we, for the
                 first time, propose to leverage emerging Deep
                 Reinforcement Learning (DRL) for enabling model-free
                 control in DSDPSs; and present design, implementation
                 and evaluation of a novel and highly effective
                 DRL-based control framework, which minimizes average
                 end-to-end tuple processing time by jointly learning
                 the system environment via collecting very limited
                 runtime statistics data and making decisions under the
                 guidance of powerful Deep Neural Networks (DNNs). To
                 validate and evaluate the proposed framework, we
                 implemented it based on a widely-used DSDPS, Apache
                 Storm, and tested it with three representative
                 applications: continuous queries, log stream processing
                 and word count (stream version). Extensive experimental
                 results show (1) Compared to Storm's default scheduler
                 and the state-of-the-art model-based method, the
                 proposed framework reduces average tuple processing by
                 33.5\% and 14.0\% respectively on average. (2) The
                 proposed framework can quickly reach a good scheduling
                 solution during online learning, which justifies its
                 practicability for online control in DSDPSs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Psallidas:2018:SFG,
  author =       "Fotis Psallidas and Eugene Wu",
  title =        "{Smoke}: fine-grained lineage at interactive speed",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "6",
  pages =        "719--732",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3184470.3184475",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 10 06:50:54 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data lineage describes the relationship between
                 individual input and output data items of a workflow
                 and is an integral ingredient for both traditional
                 (e.g., debugging or auditing) and emergent (e.g.,
                 explanations or cleaning) applications. The core,
                 long-standing problem that lineage systems need to
                 address---and the main focus of this paper---is to
                 quickly capture lineage across a workflow in order to
                 speed up future queries over lineage. Current lineage
                 systems, however, either incur high lineage capture
                 overheads, high lineage query processing costs, or
                 both. In response, developers resort to manual
                 implementations of applications that, in principal, can
                 be expressed and optimized in lineage terms. This paper
                 describes Smoke, an in-memory database engine that
                 provides both fast lineage capture and lineage query
                 processing. To do so, Smoke tightly integrates the
                 lineage capture logic into physical database operators;
                 stores lineage in efficient lineage representations;
                 and employs optimizations if future lineage queries are
                 known up-front. Our experiments on microbenchmarks and
                 realistic workloads show that Smoke reduces the lineage
                 capture overhead and lineage query costs by multiple
                 orders of magnitude as compared to state-of-the-art
                 alternatives. On real-world applications, we show that
                 Smoke meets the latency requirements of interactive
                 visualizations (e.g., $<$ 150ms) and outperforms
                 hand-written implementations of data profiling
                 primitives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Idris:2018:CQI,
  author =       "Muhammad Idris and Mart{\'\i}n Ugarte and Stijn
                 Vansummeren and Hannes Voigt and Wolfgang Lehner",
  title =        "Conjunctive queries with inequalities under updates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "7",
  pages =        "733--745",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3192965.3192966",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern application domains such as Composite Event
                 Recognition (CER) and real-time Analytics require the
                 ability to dynamically refresh query results under high
                 update rates. Traditional approaches to this problem
                 are based either on the materialization of subresults
                 (to avoid their recomputation) or on the recomputation
                 of subresults (to avoid the space overhead of
                 materialization). Both techniques have recently been
                 shown suboptimal: instead of materializing results and
                 subresults, one can maintain a data structure that
                 supports efficient maintenance under updates and can
                 quickly enumerate the full query output, as well as the
                 changes produced under single updates. Unfortunately,
                 these data structures have been developed only for
                 aggregate-join queries composed of equi-joins, limiting
                 their applicability in domains such as CER where
                 temporal joins are commonplace. In this paper, we
                 present a new approach for dynamically evaluating
                 queries with multi-way $ \theta $-joins under updates
                 that is effective in avoiding both materialization and
                 recomputation of results, while supporting a wide range
                 of applications. To do this we generalize Dynamic
                 Yannakakis, an algorithm for dynamically processing
                 acyclic equi-join queries. In tandem, and of
                 independent interest, we generalize the notions of
                 acyclicity and free-connexity to arbitrary $ \theta
                 $-joins. We instantiate our framework to the case where
                 $ \theta $-joins are only composed of equalities and
                 inequalities ($<$, $ \leq $, $=$, $>$, $ \geq $) and
                 experimentally compare this algorithm, called IEDyn, to
                 state of the art CER systems as well as incremental
                 view maintenance engines. IEDyn performs consistently
                 better than the competitor systems with up to two
                 orders of magnitude improvements in both time and
                 memory consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yint:2018:BER,
  author =       "Zhicheng Yint and Jin Sun and Ming Li and Jaliya
                 Ekanayake and Haibo Lin and Marc Friedman and Jos{\'e}
                 A. Blakeley and Clemens Szyperski and Nikhil R.
                 Devanur",
  title =        "Bubble execution: resource-aware reliable analytics at
                 cloud scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "7",
  pages =        "746--758",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3192965.3192967",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Enabling interactive data exploration at cloud scale
                 requires minimizing end-to-end query execution latency,
                 while guaranteeing fault tolerance, and query execution
                 under resource-constraints. Typically, such a query
                 execution involves orchestrating the execution of
                 hundreds or thousands of related tasks on cloud scale
                 clusters. Without any resource constraints, all query
                 tasks can be scheduled to execute simultaneously (gang
                 scheduling) while connected tasks stream data between
                 them. When the data size referenced by a query
                 increases, gang scheduling may be resource-wasteful or
                 un-satisfiable with a limited, per-query resource
                 budget. This paper introduces Bubble Execution, a new
                 query processing framework for interactive workloads at
                 cloud scale, that balances cost-based query
                 optimization, fault tolerance, optimal resource
                 management, and execution orchestration. Bubble
                 execution involves dividing a query execution graph
                 into a collection of query sub-graphs (bubbles), and
                 scheduling them within a per-query resource budget. The
                 query operators (tasks) inside a bubble stream data
                 between them while fault tolerance is handled by
                 persisting temporary results at bubble boundaries. Our
                 implementation enhances our JetScope service, for
                 interactive workloads, deployed in production clusters
                 at Microsoft. Experiments with TPC-H queries show that
                 bubble execution can reduce resource usage
                 significantly in the presence of failures while
                 maintaining performance competitive with gang
                 execution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kruse:2018:EDA,
  author =       "Sebastian Kruse and Felix Naumann",
  title =        "Efficient discovery of approximate dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "7",
  pages =        "759--772",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3192965.3192968",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Functional dependencies (FDs) and unique column
                 combinations (UCCs) form a valuable ingredient for many
                 data management tasks, such as data cleaning, schema
                 recovery, and query optimization. Because these
                 dependencies are unknown in most scenarios, their
                 automatic discovery has been well researched. However,
                 existing methods mostly discover only exact
                 dependencies, i.e., those without violations.
                 Real-world dependencies, in contrast, are frequently
                 approximate due to data exceptions, ambiguities, or
                 data errors. This relaxation to approximate
                 dependencies renders their discovery an even harder
                 task than the already challenging exact dependency
                 discovery. To this end, we propose the novel and highly
                 efficient algorithm Pyro to discover both approximate
                 FDs and approximate UCCs. Pyro combines a
                 separate-and-conquer search strategy with
                 sampling-based guidance that quickly detects dependency
                 candidates and verifies them. In our broad experimental
                 evaluation, Pyro outperforms existing discovery
                 algorithms by a factor of up to 33, scales to larger
                 datasets, and at the same time requires the least main
                 memory.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2018:RID,
  author =       "Yue Wang and Alexandra Meliou and Gerome Miklau",
  title =        "{RC-index}: diversifying answers to range queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "7",
  pages =        "773--786",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3192965.3192969",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query result diversification is widely used in data
                 exploration, Web search, and recommendation systems.
                 The problem of returning diversified query results
                 consists of finding a small subset of valid query
                 answers that are representative and different from one
                 another, usually quantified by a diversity score. Most
                 existing techniques for query diversification first
                 compute all valid query results and then find a diverse
                 subset. These techniques are inefficient when the set
                 of valid query results is large. Other work has
                 proposed efficient solutions for restricted application
                 settings, where results are shared across multiple
                 queries. In this paper, our goal is to support result
                 diversification for general range queries over a single
                 relation. We propose the RC-Index, a novel index
                 structure that achieves efficiency by reducing the
                 number of items that must be retrieved by the database
                 to form a diverse set of the desired size (about 1
                 second for a dataset of 1 million items). Further, we
                 prove that an RC-Index offers strong approximation
                 guarantees. To the best of our knowledge, this is the
                 first index-based diversification method with a
                 guaranteed approximation ratio for range queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2018:UUP,
  author =       "Xin Ding and Lu Chen and Yunjun Gao and Christian S.
                 Jensen and Hujun Bao",
  title =        "{UlTraMan}: a unified platform for big trajectory data
                 management and analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "7",
  pages =        "787--799",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3192965.3192970",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Massive trajectory data is being generated by
                 GPS-equipped devices, such as cars and mobile phones,
                 which is used increasingly in transportation,
                 location-based services, and urban computing. As a
                 result, a variety of methods have been proposed for
                 trajectory data management and analytics. However,
                 traditional systems and methods are usually designed
                 for very specific data management or analytics needs,
                 which forces users to stitch together heterogeneous
                 systems to analyze trajectory data in an inefficient
                 manner. Targeting the overall data pipeline of big
                 trajectory data management and analytics, we present a
                 unified platform, termed as UlTraMan. In order to
                 achieve scalability, efficiency, persistence, and
                 flexibility, (i) we extend Apache Spark with respect to
                 both data storage and computing by seamlessly
                 integrating a key--value store, and (ii) we enhance the
                 MapReduce paradigm to allow flexible optimizations
                 based on random data access. We study the resulting
                 system's flexibility using case studies on data
                 retrieval, aggregation analyses, and pattern mining.
                 Extensive experiments on real and synthetic trajectory
                 data are reported to offer insight into the scalability
                 and performance of UlTraMan.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jindal:2018:SSM,
  author =       "Alekh Jindal and Konstantinos Karanasos and Sriram Rao
                 and Hiren Patel",
  title =        "Selecting subexpressions to materialize at datacenter
                 scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "7",
  pages =        "800--812",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3192965.3192971",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We observe significant overlaps in the computations
                 performed by user jobs in modern shared analytics
                 clusters. Na{\"\i}vely computing the same
                 subexpressions multiple times results in wasting
                 cluster resources and longer execution times. Given
                 that these shared cluster workloads consist of tens of
                 thousands of jobs, identifying overlapping computations
                 across jobs is of great interest to both cluster
                 operators and users. Nevertheless, existing approaches
                 support orders of magnitude smaller workloads or employ
                 heuristics with limited effectiveness. In this paper,
                 we focus on the problem of subexpression selection for
                 large workloads, i.e., selecting common parts of job
                 plans and materializing them to speed-up the evaluation
                 of subsequent jobs. We provide an ILP-based formulation
                 of our problem and map it to a bipartite graph labeling
                 problem. Then, we introduce BigSubs, a vertex-centric
                 graph algorithm to iteratively choose in parallel which
                 subexpressions to materialize and which subexpressions
                 to use for evaluating each job. We provide a
                 distributed implementation of our approach using our
                 internal SQL-like execution framework, SCOPE, and
                 assess its effectiveness over production workloads.
                 BigSubs supports workloads with tens of thousands of
                 jobs, yielding savings of up to 40\% in machine-hours.
                 We are currently integrating our techniques with the
                 SCOPE runtime in our production clusters.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nargesian:2018:TUS,
  author =       "Fatemeh Nargesian and Erkang Zhu and Ken Q. Pu and
                 Ren{\'e}e J. Miller",
  title =        "Table union search on open data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "7",
  pages =        "813--825",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3192965.3192973",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We define the table union search problem and present a
                 probabilistic solution for finding tables that are
                 unionable with a query table within massive
                 repositories. Two tables are unionable if they share
                 attributes from the same domain. Our solution
                 formalizes three statistical models that describe how
                 unionable attributes are generated from set domains,
                 semantic domains with values from an ontology, and
                 natural language domains. We propose a data-driven
                 approach that automatically determines the best model
                 to use for each pair of attributes. Through a
                 distribution-aware algorithm, we are able to find the
                 optimal number of attributes in two tables that can be
                 unioned. To evaluate accuracy, we created and
                 open-sourced a benchmark of Open Data tables. We show
                 that our table union search outperforms in speed and
                 accuracy existing algorithms for finding related tables
                 and scales to provide efficient search over Open Data
                 repositories containing more than one million
                 attributes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2018:STH,
  author =       "Jianfei Chen and Jun Zhu and Jie Lu and Shixia Liu",
  title =        "Scalable training of hierarchical topic models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "7",
  pages =        "826--839",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3192965.3192972",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale topic models serve as basic tools for
                 feature extraction and dimensionality reduction in many
                 practical applications. As a natural extension of flat
                 topic models, hierarchical topic models (HTMs) are able
                 to learn topics of different levels of abstraction,
                 which lead to deeper understanding and better
                 generalization than their flat counterparts. However,
                 existing scalable systems for flat topic models cannot
                 handle HTMs, due to their complicated data structures
                 such as trees and concurrent dynamically growing
                 matrices, as well as their susceptibility to local
                 optima. In this paper, we study the hierarchical latent
                 Dirichlet allocation (hLDA) model which is a powerful
                 nonparametric Bayesian HTM. We propose an efficient
                 partially collapsed Gibbs sampling algorithm for hLDA,
                 as well as an initialization strategy to deal with
                 local optima introduced by tree-structured models. We
                 also identify new system challenges in building
                 scalable systems for HTMs, and propose efficient data
                 layout for vectorizing HTM as well as distributed data
                 structures including dynamic matrices and trees.
                 Empirical studies show that our system is 87 times more
                 efficient than the previous open-source implementation
                 for hLDA, and can scale to thousands of CPU cores. We
                 demonstrate our scalability on a 131-million-document
                 corpus with 28 billion tokens, which is 4--5 orders of
                 magnitude larger than previously used corpus. Our
                 distributed implementation can extract 1,722 topics
                 from the corpus with 50 machines in just 7 hours.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Coskun:2018:IFN,
  author =       "Mustafa Coskun and Ananth Grama and Mehmet
                 Koyut{\"u}rk",
  title =        "Indexed fast network proximity querying",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "8",
  pages =        "840--852",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3204028.3204029",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Node proximity queries are among the most common
                 operations on network databases. A common measure of
                 node proximity is random walk based proximity, which
                 has been shown to be less susceptible to noise and
                 missing data. Real-time processing of random-walk based
                 proximity queries poses significant computational
                 challenges for larger graphs with over billions of
                 nodes and edges, since it involves solution of large
                 linear systems of equations. Due to the importance of
                 this operation, significant effort has been devoted to
                 developing efficient methods for random-walk based node
                 proximity computations. These methods either aim to
                 speed up iterative computations by exploiting numerical
                 properties of random walks, or rely on computation and
                 storage of matrix inverses to avoid computation during
                 query processing. Although both approaches have been
                 well studied, the speedup achieved by iterative
                 approaches does not translate to real-time query
                 processing, and the storage requirements of
                 inversion-based approaches prohibit their use on very
                 large graph databases. We present a novel approach to
                 significantly reducing the computational cost of random
                 walk based node proximity queries with scalable
                 indexing. Our approach combines domain
                 graph-partitioning based indexing with fast iterative
                 computations during query processing using Chebyshev
                 polynomials over the complex elliptic plane. This
                 approach combines the query processing benefits of
                 inversion techniques with the memory and storage
                 benefits of iterative approache. Using real-world
                 networks with billions of nodes and edges, and top- k
                 proximity queries as the benchmark problem, we show
                 that our algorithm, I-C hopper, significantly
                 outperforms existing methods. Specifically, it
                 drastically reduces convergence time of the iterative
                 procedure, while also reducing storage requirements for
                 indexing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zheng:2018:ODP,
  author =       "Libin Zheng and Lei Chen and Jieping Ye",
  title =        "Order dispatch in price-aware ridesharing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "8",
  pages =        "853--865",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3204028.3204030",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the prevalence of car-hailing applications,
                 ridesharing becomes more and more popular because of
                 its great potential in monetary saving and
                 environmental protection. Order dispatch is the key
                 problem in ridesharing, which has a strong impact on
                 riders' experience and platform's performance. Existing
                 order dispatch research works fail to consider the
                 price of the orders, which can be an important
                 reference because it directly relates to the platform's
                 profit. Our work takes the order price into concern,
                 and formulates a constrained optimization problem,
                 which takes platform's profit as the optimization
                 objective and performs controls on riders' detour
                 distance and waiting time. We prove the problem is
                 NP-hard, thus, we propose approximation methods. We
                 further develop a simulation framework based on real
                 ridesharing order and vehicle data. We conduct
                 experiments with this simulation framework to evaluate
                 the effectiveness and efficiency of the proposed
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mouratidis:2018:EPU,
  author =       "Kyriakos Mouratidis and Bo Tang",
  title =        "Exact processing of uncertain top-$k$ queries in
                 multi-criteria settings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "8",
  pages =        "866--879",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3204028.3204031",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional rank-aware processing assumes a dataset
                 that contains available options to cover a specific
                 need (e.g., restaurants, hotels, etc) and users who
                 browse that dataset via top- k queries with linear
                 scoring functions, i.e., by ranking the options
                 according to the weighted sum of their attributes, for
                 a set of given weights. In practice, however, user
                 preferences (weights) may only be estimated with
                 bounded accuracy, or may be inherently uncertain due to
                 the inability of a human user to specify exact weight
                 values with absolute accuracy. Motivated by this, we
                 introduce the uncertain top-k query ( UTK ). Given
                 uncertain preferences, that is, an approximate
                 description of the weight values, the UTK query reports
                 all options that may belong to the top- k set. A second
                 version of the problem additionally reports the exact
                 top- k set for each of the possible weight settings. We
                 develop a scalable processing framework for both UTK
                 versions, and demonstrate its efficiency using standard
                 benchmark datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Berti-Equille:2018:DGF,
  author =       "Laure Berti-{\'E}quille and Hazar Harmouch and Felix
                 Naumann and No{\"e}l Novelli and Saravanan
                 Thirumuruganathan",
  title =        "Discovery of genuine functional dependencies from
                 relational data with missing values",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "8",
  pages =        "880--892",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3204028.3204032",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Functional dependencies (FDs) play an important role
                 in maintaining data quality. They can be used to
                 enforce data consistency and to guide repairs over a
                 database. In this work, we investigate the problem of
                 missing values and its impact on FD discovery. When
                 using existing FD discovery algorithms, some genuine
                 FDs could not be detected precisely due to missing
                 values or some non-genuine FDs can be discovered even
                 though they are caused by missing values with a certain
                 NULL semantics. We define a notion of genuineness and
                 propose algorithms to compute the genuineness score of
                 a discovered FD. This can be used to identify the
                 genuine FDs among the set of all valid dependencies
                 that hold on the data. We evaluate the quality of our
                 method over various real-world and semi-synthetic
                 datasets with extensive experiments. The results show
                 that our method performs well for relatively large FD
                 sets and is able to accurately capture genuine FDs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cai:2018:ETD,
  author =       "Qingchao Cai and Zhongle Xie and Meihui Zhang and Gang
                 Chen and H. V. Jagadish and Beng Chin Ooi",
  title =        "Effective temporal dependence discovery in time series
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "8",
  pages =        "893--905",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3204028.3204033",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To analyze user behavior over time, it is useful to
                 group users into cohorts, giving rise to cohort
                 analysis. We identify several crucial limitations of
                 current cohort analysis, motivated by the unmet need
                 for temporal dependence discovery. To address these
                 limitations, we propose a generalization that we call
                 recurrent cohort analysis. We introduce a set of
                 operators for recurrent cohort analysis and design
                 access methods specific to these operators in both
                 single-node and distributed environments. Through
                 extensive experiments, we show that recurrent cohort
                 analysis when implemented using the proposed access
                 methods is up to six orders faster than one implemented
                 as a layer on top of a database in a single-node
                 setting, and two orders faster than one implemented
                 using Spark SQL in a distributed setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Arora:2018:HIP,
  author =       "Akhil Arora and Sakshi Sinha and Piyush Kumar and
                 Arnab Bhattacharya",
  title =        "{HD-index}: pushing the scalability-accuracy boundary
                 for approximate {kNN} search in high-dimensional
                 spaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "8",
  pages =        "906--919",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3204028.3204034",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nearest neighbor searching of large databases in
                 high-dimensional spaces is inherently difficult due to
                 the curse of dimensionality. A flavor of approximation
                 is, therefore, necessary to practically solve the
                 problem of nearest neighbor search. In this paper, we
                 propose a novel yet simple indexing scheme, HD-Index,
                 to solve the problem of approximate k-nearest neighbor
                 queries in massive high-dimensional databases. HD-Index
                 consists of a set of novel hierarchical structures
                 called RDB-trees built on Hilbert keys of database
                 objects. The leaves of the RDB-trees store distances of
                 database objects to reference objects, thereby allowing
                 efficient pruning using distance filters. In addition
                 to triangular inequality, we also use Ptolemaic
                 inequality to produce better lower bounds. Experiments
                 on massive (up to billion scale) high-dimensional (up
                 to 1000+) datasets show that HD-Index is effective,
                 efficient, and scalable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ahmad:2018:LSL,
  author =       "Yousuf Ahmad and Omar Khattab and Arsal Malik and
                 Ahmad Musleh and Mohammad Hammoud and Mucahid Kutlu and
                 Mostafa Shehata and Tamer Elsayed",
  title =        "{LA3}: a scalable link- and locality-aware linear
                 algebra-based graph analytics system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "8",
  pages =        "920--933",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3204028.3204035",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 29 08:31:56 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents LA3, a scalable distributed system
                 for graph analytics. LA3 couples a vertex-based
                 programming model with a highly optimized linear
                 algebra-based engine. It translates any vertex-centric
                 program into an iteratively executed sparse
                 matrix-vector multiplication (SpMV). To reduce
                 communication and enhance scalability, the adjacency
                 matrix representing an input graph is partitioned into
                 locality-aware 2D tiles distributed across multiple
                 processes. Alongside, three major optimizations are
                 incorporated to preclude redundant computations and
                 minimize communication. First, the link-based structure
                 of the input graph is exploited to classify vertices
                 into different types. Afterwards, vertices of special
                 types are factored out of the main loop of the graph
                 application to avoid superfluous computations. We refer
                 to this novel optimization as computation filtering.
                 Second, a communication filtering mechanism is involved
                 to optimize for the high sparsity of the input matrix
                 due to power-law distributions, common in real-world
                 graphs. This optimization ensures that each process
                 receives only the messages that pertain to non-zero
                 entries in its tiles, substantially reducing
                 communication traffic since most tiles are highly
                 sparse. Lastly, a pseudo-asynchronous computation and
                 communication optimization is proposed, whereby
                 processes progress and communicate asynchronously,
                 consume messages as soon as they become available, and
                 block otherwise. We implemented and extensively tested
                 LA3 on private and public clouds. Results show that LA3
                 outperforms six related state-of-the-art and popular
                 distributed graph analytics systems by an average of
                 10X.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2018:TSE,
  author =       "Dongxiang Zhang and Mengting Ding and Dingyu Yang and
                 Yi Liu and Ju Fan and Heng Tao Shen",
  title =        "Trajectory simplification: an experimental study and
                 quality analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "934--946",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213885",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ubiquitousness of GPS sensors in smart-phones,
                 vehicles and wearable devices has enabled the
                 collection of massive volumes of trajectory data from
                 tracing moving objects. Consequently, an unprecedented
                 scale of timestamped GPS data has been generated and
                 posed an urgent demand for an effective storage
                 mechanism for trajectory databases. The mainstream
                 compression technique is called trajectory
                 simplification, that finds a subsequence to approximate
                 the original trajectory and attempts to minimize the
                 information loss under a distance measure. Even though
                 various simplification algorithms have been proposed in
                 the past decades, there still lacks a thorough
                 comparison to cover all the state-of-the-art algorithms
                 and evaluate their quality using datasets in
                 diversified motion patterns. Hence, it still remains a
                 challenge for GPS data collectors to determine a proper
                 algorithm in a concrete application. In addition,
                 almost the entire line of previous methods uses
                 error-based metrics to evaluate the compression
                 quality, while ignoring their usability in supporting
                 spatio-temporal queries on top of the reduced database.
                 To bridge these gaps, we conduct so far the most
                 comprehensive evaluation on trajectory simplification
                 techniques. We compare the performance of 25 algorithms
                 in total using five real datasets in different motion
                 patterns. According to the experimental findings, we
                 present useful guidance for the selection or
                 development of effective trajectory simplification
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Antenucci:2018:CBE,
  author =       "Dolan Antenucci and Michael Cafarella",
  title =        "Constraint-based explanation and repair of
                 filter-based transformations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "947--960",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213886",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data analysts often need to transform an existing
                 dataset, such as with filtering, into a new dataset for
                 downstream analysis. Even the most trivial of mistakes
                 in this phase can introduce bias and lead to the
                 formation of invalid conclusions. For example, consider
                 a researcher identifying subjects for trials of a new
                 statin drug. She might identify patients with a high
                 dietary cholesterol intake as a population likely to
                 benefit from the drug, however, selection of these
                 individuals could bias the test population to those
                 with a generally unhealthy lifestyle, thereby
                 compromising the analysis. Reducing the potential for
                 bias in the dataset transformation process can minimize
                 the need to later engage in the tedious, time-consuming
                 process of trying to eliminate bias while preserving
                 the target dataset. We propose a novel interaction
                 model for explain-and-repair data transformation
                 systems, in which users inter-actively define
                 constraints for transformation code and the resultant
                 data. The system satisfies these constraints as far as
                 possible, and provides an explanation for any problems
                 encountered. We present an algorithm that yields
                 filter-based transformation code satisfying user
                 constraints. We implemented and evaluated a prototype
                 of this architecture, E meril, using both synthetic and
                 real-world datasets. Our approach finds solutions 34\%
                 more often and 77\% more quickly than the previous
                 state-of-the-art solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2018:SSQ,
  author =       "Xiaolan Wang and Aaron Feng and Behzad Golshan and
                 Alon Halevy and George Mihaila and Hidekazu Oiwa and
                 Wang-Chiew Tan",
  title =        "Scalable semantic querying of text",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "961--974",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213887",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present the Koko system that takes declarative
                 information extraction to a new level by incorporating
                 advances in natural language processing techniques in
                 its extraction language. K oko is novel in that its
                 extraction language simultaneously supports conditions
                 on the surface of the text and on the structure of the
                 dependency parse tree of sentences, thereby allowing
                 for more refined extractions. Koko also supports
                 conditions that are forgiving to linguistic variation
                 of expressing concepts and allows to aggregate evidence
                 from the entire document in order to filter
                 extractions. To scale up, K oko exploits a
                 multi-indexing scheme and heuristics for efficient
                 extractions. We extensively evaluate Koko over publicly
                 available text corpora. We show that Koko indices take
                 up the smallest amount of space, are notably faster and
                 more effective than a number of prior indexing schemes.
                 Finally, we demonstrate Koko's scalability on a corpus
                 of 5 million Wikipedia articles.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bellomarini:2018:VSD,
  author =       "Luigi Bellomarini and Emanuel Sallinger and Georg
                 Gottlob",
  title =        "The {Vadalog} system: datalog-based reasoning for
                 knowledge graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "975--987",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213888",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Over the past years, there has been a resurgence of
                 Datalog-based systems in the database community as well
                 as in industry. In this context, it has been recognized
                 that to handle the complex knowledge-based scenarios
                 encountered today, such as reasoning over large
                 knowledge graphs, Datalog has to be extended with
                 features such as existential quantification. Yet,
                 Datalog-based reasoning in the presence of existential
                 quantification is in general undecidable. Many efforts
                 have been made to define decidable fragments. Warded
                 Datalog+/- is a very promising one, as it captures
                 PTIME complexity while allowing ontological reasoning.
                 Yet so far, no implementation of Warded Datalog+/- was
                 available. In this paper we present the Vadalog system,
                 a Datalog-based system for performing complex logic
                 reasoning tasks, such as those required in advanced
                 knowledge graphs. The Vadalog system is Oxford's
                 contribution to the VADA research programme, a joint
                 effort of the universities of Oxford, Manchester and
                 Edinburgh and around 20 industrial partners. As the
                 main contribution of this paper, we illustrate the
                 first implementation of Warded Datalog+/-, a
                 high-performance Datalog+/- system utilizing an
                 aggressive termination control strategy. We also
                 provide a comprehensive experimental evaluation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Medya:2018:NND,
  author =       "Sourav Medya and Sayan Ranu and Jithin Vachery and
                 Ambuj Singh",
  title =        "Noticeable network delay minimization via node
                 upgrades",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "988--1001",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213889",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In several domains, the flow of data is governed by an
                 underlying network. Reduction of delays in end-to-end
                 data flow is an important network optimization task.
                 Reduced delays enable shorter travel times for vehicles
                 in road networks, faster information flow in social
                 networks, and increased rate of packets in
                 communication networks. While techniques for network
                 delay minimization have been proposed, they fail to
                 provide any noticeable reduction in individual data
                 flows. Furthermore, they treat all nodes as equally
                 important, which is often not the case in real-world
                 networks. In this paper, we incorporate these practical
                 aspects and propose a network design problem where the
                 goal is to perform k network upgrades such that it
                 maximizes the number of flows in the network with a
                 noticeable reduction in delay. We show that the problem
                 is NP-hard, APX-hard, and non-submodular. We overcome
                 these computational challenges by designing an
                 importance sampling based algorithm with provable
                 quality guarantees. Through extensive experiments on
                 real and synthetic data sets, we establish that
                 importance sampling imparts up to 1000 times speed-up
                 over the greedy approach, and provides up to 70 times
                 the improvement achieved by the state-of-the-art
                 technique.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Palkar:2018:EEE,
  author =       "Shoumik Palkar and James Thomas and Deepak Narayanan
                 and Pratiksha Thaker and Rahul Palamuttam and Parimajan
                 Negi and Anil Shanbhag and Malte Schwarzkopf and Holger
                 Pirk and Saman Amarasinghe and Samuel Madden and Matei
                 Zaharia",
  title =        "Evaluating end-to-end optimization for data analytics
                 applications in {Weld}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "1002--1015",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213890",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern analytics applications use a diverse mix of
                 libraries and functions. Unfortunately, there is no
                 optimization across these libraries, resulting in
                 performance penalties as high as an order of magnitude
                 in many applications. To address this problem, we
                 proposed Weld, a common runtime for existing data
                 analytics libraries that performs key physical
                 optimizations such as pipelining under existing,
                 imperative library APIs. In this work, we further
                 develop the Weld vision by designing an automatic
                 adaptive optimizer for Weld applications, and
                 evaluating its impact on realistic data science
                 workloads. Our optimizer eliminates multiple forms of
                 overhead that arise when composing imperative libraries
                 like Pandas and NumPy, and uses lightweight
                 measurements to make data-dependent decisions at
                 run-time in ad-hoc workloads where no statistics are
                 available, with sub-second overhead. We also evaluate
                 which optimizations have the largest impact in practice
                 and whether Weld can be integrated into libraries
                 incrementally. Our results are promising: using our
                 optimizer, Weld accelerates data science workloads by
                 up to 23X on one thread and 80X on eight threads, and
                 its adaptive optimizations provide up to a 3.75X
                 speedup over rule-based optimization. Moreover, Weld
                 provides benefits if even just 4--5 operators in a
                 library are ported to use it. Our results show that
                 common runtime designs like Weld may be a viable
                 approach to accelerate analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Muller:2018:ISE,
  author =       "Magnus M{\"u}ller and Guido Moerkotte and Oliver
                 Kolb",
  title =        "Improved selectivity estimation by combining knowledge
                 from sampling and synopses",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "1016--1028",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213882",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Estimating selectivities remains a critical task in
                 query processing. Optimizers rely on the accuracy of
                 selectivities when generating execution plans and, in
                 approximate query answering, estimated selectivities
                 affect the quality of the result. Many systems maintain
                 synopses, e.g., histograms, and, in addition, provide
                 sampling facilities. In this paper, we present a novel
                 approach to combine knowledge from synopses and
                 sampling for the purpose of selectivity estimation for
                 conjunctive queries. We first show how to extract
                 information from synopses and sampling such that they
                 are mutually consistent. In a second step, we show how
                 to combine them and decide on an admissible selectivity
                 estimate. We compare our approach to state-of-the-art
                 methods and evaluate the strengths and limitations of
                 each approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Han:2018:EAA,
  author =       "Kai Han and Keke Huang and Xiaokui Xiao and Jing Tang
                 and Aixin Sun and Xueyan Tang",
  title =        "Efficient algorithms for adaptive influence
                 maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "1029--1040",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213883",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a social network $G$, the influence maximization
                 (IM) problem seeks a set $S$ of $k$ seed nodes in $G$
                 to maximize the expected number of nodes activated via
                 an influence cascade starting from $S$. Although a lot
                 of algorithms have been proposed for IM, most of them
                 only work under the non-adaptive setting, i.e., when
                 all $k$ seed nodes are selected before we observe how
                 they influence other users. In this paper, we study the
                 adaptive IM problem, where we select the $k$ seed nodes
                 in batches of equal size $b$, such that the choice of
                 the $i$-th batch can be made after the influence
                 results of the first $ i - 1$ batches are observed. We
                 propose the first practical algorithms for adaptive IM
                 with an approximation guarantee of $ 1 - \exp (\xi -
                 1)$ for $ b = 1$ and $ 1 - \exp (\xi - 1 + 1 / e)$ for
                 $ b > 1$, where $ \xi $ is any number in $ (0, 1)$. Our
                 approach is based on a novel AdaptGreedy framework
                 instantiated by non-adaptive IM algorithms, and its
                 performance can be substantially improved if the
                 non-adaptive IM algorithm has a small expected
                 approximation error. However, no current non-adaptive
                 IM algorithms provide such a desired property.
                 Therefore, we further propose a non-adaptive IM
                 algorithm called EPIC, which not only has the same
                 worst-case performance bounds with that of the
                 state-of-the-art non-adaptive IM algorithms, but also
                 has a reduced expected approximation error. We also
                 provide a theoretical analysis to quantify the
                 performance gain brought by instantiating AdaptGreedy
                 using EPIC, compared with a naive approach using the
                 existing IM algorithms. Finally, we use real social
                 networks to evaluate the performance of our approach
                 through extensive experiments, and the experimental
                 experiments strongly corroborate the superiorities of
                 our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Breslow:2018:MFF,
  author =       "Alex D. Breslow and Nuwan S. Jayasena",
  title =        "{Morton} filters: faster, space-efficient cuckoo
                 filters via biasing, compression, and decoupled logical
                 sparsity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "1041--1055",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213884",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Approximate set membership data structures (ASMDSs)
                 are ubiquitous in computing. They trade a tunable,
                 often small, error rate ($ \epsilon $) for large space
                 savings. The canonical ASMDS is the Bloom filter, which
                 supports lookups and insertions but not deletions in
                 its simplest form. Cuckoo filters (CFs), a recently
                 proposed class of ASMDSs, add deletion support and
                 often use fewer bits per item for equal $ \epsilon $.
                 This work introduces the Morton filter (MF), a novel
                 AS-MDS that introduces several key improvements to CFs.
                 Like CFs, MFs support lookups, insertions, and
                 deletions, but improve their respective throughputs by
                 1.3x to 2.5x, 0.9x to 15.5x, and 1.3x to 1.6x. MFs
                 achieve these improvements by (1) introducing a
                 compressed format that permits a logically sparse
                 filter to be stored compactly in memory, (2) leveraging
                 succinct embedded metadata to prune unnecessary memory
                 accesses, and (3) heavily biasing insertions to use a
                 single hash function. With these optimizations,
                 lookups, insertions, and deletions often only require
                 accessing a single hardware cache line from the filter.
                 These improvements are not at a loss in space
                 efficiency, as MFs typically use comparable to slightly
                 less space than CFs for the same epsis;.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bi:2018:OPA,
  author =       "Fei Bi and Lijun Chang and Xuemin Lin and Wenjie
                 Zhang",
  title =        "An optimal and progressive approach to online search
                 of top-$k$ influential communities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "9",
  pages =        "1056--1068",
  month =        may,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3213880.3213881",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Jun 30 09:26:43 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Community search over large graphs is a fundamental
                 problem in graph analysis. Recent studies propose to
                 compute top- k influential communities, where each
                 reported community not only is a cohesive subgraph but
                 also has a high influence value. The existing
                 approaches to the problem of top- k influential
                 community search can be categorized as index-based
                 algorithms and online search algorithms without
                 indexes. The index-based algorithms, although being
                 very efficient in conducting community searches, need
                 to pre-compute a special-purpose index and only work
                 for one built-in vertex weight vector. In this paper,
                 we investigate online search approaches and propose an
                 instance-optimal algorithm LocalSearch whose time
                 complexity is linearly proportional to the size of the
                 smallest subgraph that a correct algorithm needs to
                 access without indexes. In addition, we also propose
                 techniques to make LocalSearch progressively compute
                 and report the communities in decreasing influence
                 value order such that k does not need to be specified.
                 Moreover, we extend our framework to the general case
                 of top- k influential community search regarding other
                 cohesiveness measures. Extensive empirical studies on
                 real graphs demonstrate that our algorithms outperform
                 the existing online search algorithms by several orders
                 of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Meister:2018:EAT,
  author =       "Andreas Meister and Guido Moerkotte and Gunter Saake",
  title =        "Errata for {``Analysis of two existing and one new
                 dynamic programming algorithm for the generation of
                 optimal bushy join trees without cross products''}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1069--1070",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231756",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the published version of EnumerateCmp in the
                 Section 3.3 on Page 936 [1], see also Algorithm 1, a
                 small error is included in Line 5. In the first call of
                 EnumerateCsgRec, too many nodes $ (X \cup N) $ will be
                 excluded for the emission of complements, leading to
                 the fact that, in general, not all complements will be
                 emitted correctly.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Park:2018:DSB,
  author =       "Noseong Park and Mahmoud Mohammadi and Kshitij Gorde
                 and Sushil Jajodia and Hongkyu Park and Youngmin Kim",
  title =        "Data synthesis based on generative adversarial
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1071--1083",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231757",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Privacy is an important concern for our society where
                 sharing data with partners or releasing data to the
                 public is a frequent occurrence. Some of the techniques
                 that are being used to achieve privacy are to remove
                 identifiers, alter quasi-identifiers, and perturb
                 values. Unfortunately, these approaches suffer from two
                 limitations. First, it has been shown that private
                 information can still be leaked if attackers possess
                 some background knowledge or other information sources.
                 Second, they do not take into account the adverse
                 impact these methods will have on the utility of the
                 released data. In this paper, we propose a method that
                 meets both requirements. Our method, called table-GAN,
                 uses generative adversarial networks (GANs) to
                 synthesize fake tables that are statistically similar
                 to the original table yet do not incur information
                 leakage. We show that the machine learning models
                 trained using our synthetic tables exhibit performance
                 that is similar to that of models trained using the
                 original table for unknown testing cases. We call this
                 property model compatibility. We believe that
                 anonymization/perturbation/synthesis methods without
                 model compatibility are of little value. We used four
                 real-world datasets from four different domains for our
                 experiments and conducted in-depth comparisons with
                 state-of-the-art anonymization, perturbation, and
                 generation techniques. Throughout our experiments, only
                 our method consistently shows balance between privacy
                 level and model compatibility.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lockard:2018:CDS,
  author =       "Colin Lockard and Xin Luna Dong and Arash Einolghozati
                 and Prashant Shiralkar",
  title =        "{CERES}: distantly supervised relation extraction from
                 the semi-structured web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1084--1096",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231758",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The web contains countless semi-structured websites,
                 which can be a rich source of information for
                 populating knowledge bases. Existing methods for
                 extracting relations from the DOM trees of
                 semi-structured webpages can achieve high precision and
                 recall only when manual annotations for each website
                 are available. Although there have been efforts to
                 learn extractors from automatically generated labels,
                 these methods are not sufficiently robust to succeed in
                 settings with complex schemas and information-rich
                 websites. In this paper we present a new method for
                 automatic extraction from semi-structured websites
                 based on distant supervision. We automatically generate
                 training labels by aligning an existing knowledge base
                 with a website and leveraging the unique structural
                 characteristics of semi-structured websites. We then
                 train a classifier based on the potentially noisy and
                 incomplete labels to predict new relation instances.
                 Our method can compete with annotation-based techniques
                 in the literature in terms of extraction quality. A
                 large-scale experiment on over 400,000 pages from
                 dozens of multi-lingual long-tail websites harvested
                 1.25 million facts at a precision of 90\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nazi:2018:EEI,
  author =       "Azade Nazi and Bolin Ding and Vivek Narasayya and
                 Surajit Chaudhuri",
  title =        "Efficient estimation of inclusion coefficient using
                 hyperloglog sketches",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1097--1109",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231759",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Efficiently estimating the inclusion coefficient ---
                 the fraction of values of one column that are contained
                 in another column --- is useful for tasks such as data
                 profiling and foreign-key detection. We present a new
                 estimator, BML, for inclusion coefficient based on
                 Hyperloglog sketches that results in significantly
                 lower error compared to the state-of-the art approach
                 that uses Bottom-k sketches. We evaluate the error of
                 the BML estimator using experiments on industry
                 benchmarks such as TPC-H and TPC-DS, and several
                 real-world databases. As an independent contribution,
                 we show how Hyperloglog sketches can be maintained
                 incrementally with data deletions using only a constant
                 amount of additional memory.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fier:2018:SSJ,
  author =       "Fabian Fier and Nikolaus Augsten and Panagiotis Bouros
                 and Ulf Leser and Johann-Christoph Freytag",
  title =        "Set similarity joins on {MapReduce}: an experimental
                 survey",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1110--1122",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231760",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Set similarity joins, which compute pairs of similar
                 sets, constitute an important operator primitive in a
                 variety of applications, including applications that
                 must process large amounts of data. To handle these
                 data volumes, several distributed set similarity join
                 algorithms have been proposed. Unfortunately, little is
                 known about the relative performance, strengths and
                 weaknesses of these techniques. Previous comparisons
                 are limited to a small subset of relevant algorithms,
                 and the large differences in the various test setups
                 make it hard to draw overall conclusions. In this paper
                 we survey ten recent, distributed set similarity join
                 algorithms, all based on the MapReduce paradigm. We
                 empirically compare the algorithms in a uniform test
                 environment on twelve datasets that expose different
                 characteristics and represent a broad range of
                 applications. Our experiments yield a surprising
                 result: All algorithms in our test fail to scale for at
                 least one dataset and are sensitive to long sets,
                 frequent set elements, low similarity thresholds, or a
                 combination thereof. Interestingly, some algorithms
                 even fail to handle the small datasets that can easily
                 be processed in a non-distributed setting. Our analytic
                 investigation of the algorithms pinpoints the reasons
                 for the poor performance and targeted experiments
                 confirm our analytic findings. Based on our
                 investigation, we suggest directions for future
                 research in the area.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2018:PSH,
  author =       "Bailu Ding and Sudipto Das and Wentao Wu and Surajit
                 Chaudhuri and Vivek Narasayya",
  title =        "{Plan Stitch}: harnessing the best of many plans",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1123--1136",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231761",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query performance regression due to the query
                 optimizer selecting a bad query execution plan is a
                 major pain point in production workloads. Commercial
                 DBMSs today can automatically detect and correct such
                 query plan regressions by storing previously-executed
                 plans and reverting to a previous plan which is still
                 valid and has the least execution cost. Such
                 reversion-based plan correction has relatively low risk
                 of plan regression since the decision is based on
                 observed execution costs. However, this approach
                 ignores potentially valuable information of efficient
                 subplans collected from other previously-executed
                 plans. In this paper, we propose a novel technique,
                 Plan Stitch, that automatically and opportunistically
                 combines efficient subplans of previously-executed
                 plans into a valid new plan, which can be cheaper than
                 any individual previously-executed plan. We implement
                 Plan Stitch on top of Microsoft SQL Server. Our
                 experiments on TPC-DS benchmark and three real-world
                 customer workloads show that plans obtained via Plan
                 Stitch can reduce execution cost significantly, with a
                 reduction of up to two orders of magnitude in execution
                 cost when compared to reverting to the cheapest
                 previously-executed plan.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2018:FES,
  author =       "Sheng Wang and Tien Tuan Anh Dinh and Qian Lin and
                 Zhongle Xie and Meihui Zhang and Qingchao Cai and Gang
                 Chen and Beng Chin Ooi and Pingcheng Ruan",
  title =        "{Forkbase}: an efficient storage engine for blockchain
                 and forkable applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1137--1150",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231762",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Existing data storage systems offer a wide range of
                 functionalities to accommodate an equally diverse range
                 of applications. However, new classes of applications
                 have emerged, e.g., blockchain and collaborative
                 analytics, featuring data versioning, fork semantics,
                 tamper-evidence or any combination thereof. They
                 present new opportunities for storage systems to
                 efficiently support such applications by embedding the
                 above requirements into the storage. In this paper, we
                 present ForkBase, a storage engine designed for
                 blockchain and forkable applications. By integrating
                 core application properties into the storage, ForkBase
                 not only delivers high performance but also reduces
                 development effort. The storage manages multiversion
                 data and supports two variants of fork semantics which
                 enable different fork workflows. ForkBase is fast and
                 space efficient, due to a novel index class that
                 supports efficient queries as well as effective
                 detection of duplicate content across data objects,
                 branches and versions. We demonstrate ForkBase's
                 performance using three applications: a blockchain
                 platform, a wiki engine and a collaborative analytics
                 application. We conduct extensive experimental
                 evaluation against respective state-of-the-art
                 solutions. The results show that ForkBase achieves
                 superior performance while significantly lowering the
                 development effort.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ammar:2018:EAD,
  author =       "Khaled Ammar and M. Tamer {\"O}zsu",
  title =        "Experimental analysis of distributed graph systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1151--1164",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231764",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper evaluates eight parallel graph processing
                 systems: Hadoop, HaLoop, Vertica, Giraph, GraphLab
                 (PowerGraph), Blogel, Flink Gelly, and GraphX (SPARK)
                 over four very large datasets (Twitter, World Road
                 Network, UK 200705, and ClueWeb) using four workloads
                 (PageRank, WCC, SSSP and K-hop). The main objective is
                 to perform an independent scale-out study by
                 experimentally analyzing the performance, usability,
                 and scalability (using up to 128 machines) of these
                 systems. In addition to performance results, we discuss
                 our experiences in using these systems and suggest some
                 system tuning heuristics that lead to better
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{He:2018:TDE,
  author =       "Yeye He and Xu Chu and Kris Ganjam and Yudian Zheng
                 and Vivek Narasayya and Surajit Chaudhuri",
  title =        "Transform-data-by-example {(TDE)}: an extensible
                 search engine for data transformations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1165--1177",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231766",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today, business analysts and data scientists
                 increasingly need to clean, standardize and transform
                 diverse data sets, such as name, address, date time,
                 and phone number, before they can perform analysis.
                 This process of data transformation is an important
                 part of data preparation, and is known to be difficult
                 and time-consuming for end-users. Traditionally,
                 developers have dealt with these longstanding
                 transformation problems using custom code libraries.
                 They have built vast varieties of custom logic for name
                 parsing and address standardization, etc., and shared
                 their source code in places like GitHub. Data
                 transformation would be a lot easier for end-users if
                 they can discover and reuse such existing
                 transformation logic. We developed
                 Transform-Data-by-Example ( TDE ), which works like a
                 search engine for data transformations. TDE ``indexes''
                 vast varieties of transformation logic in source code,
                 DLLs, web services and mapping tables, so that users
                 only need to provide a few input/output examples to
                 demonstrate a desired transformation, and TDE can
                 interactively find relevant functions to synthesize new
                 programs consistent with all examples. Using an index
                 of 50K functions crawled from GitHub and Stackoverflow,
                 TDE can already handle many common transformations not
                 currently supported by existing systems. On a benchmark
                 with over 200 transformation tasks, TDE generates
                 correct transformations for 72\% tasks, which is
                 considerably better than other systems evaluated. A
                 beta version of TDE for Microsoft Excel is available
                 via Office store. Part of the TDE technology also ships
                 in Microsoft Power BI.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{OKeeffe:2018:FRE,
  author =       "Dan O'Keeffe and Theodoros Salonidis and Peter
                 Pietzuch",
  title =        "{Frontier}: resilient edge processing for the
                 {Internet of Things}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1178--1191",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231767",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In an edge deployment model, Internet-of-Things (IoT)
                 applications, e.g. for building automation or video
                 surveillance, must process data locally on IoT devices
                 without relying on permanent connectivity to a cloud
                 backend. The ability to harness the combined resources
                 of multiple IoT devices for computation is influenced
                 by the quality of wireless network connectivity. An
                 open challenge is how practical edge-based IoT
                 applications can be realised that are robust to changes
                 in network bandwidth between IoT devices, due to
                 interference and intermittent connectivity. We present
                 Frontier, a distributed and resilient edge processing
                 platform for IoT devices. The key idea is to express
                 data-intensive IoT applications as continuous
                 data-parallel streaming queries and to improve query
                 throughput in an unreliable wireless network by
                 exploiting network path diversity: a query includes
                 operator replicas at different IoT nodes, which
                 increases possible network paths for data. Frontier
                 dynamically routes stream data to operator replicas
                 based on network path conditions. Nodes probe path
                 throughput and use backpressure stream routing to
                 decide on transmission rates, while exploiting multiple
                 operator replicas for data-parallelism. If a node loses
                 network connectivity, a transient disconnection
                 recovery mechanism reprocesses the lost data. Our
                 experimental evaluation of Frontier shows that network
                 path diversity improves throughput by $ 1.3 \times $--$
                 2.8 \times $ for different IoT applications, while
                 being resilient to intermittent network connectivity.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Haynes:2018:LDV,
  author =       "Brandon Haynes and Amrita Mazumdar and Armin Alaghi
                 and Magdalena Balazinska and Luis Ceze and Alvin
                 Cheung",
  title =        "{LightDB}: a {DBMS} for virtual reality video",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1192--1205",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231768",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present the data model, architecture, and
                 evaluation of LightDB, a database management system
                 designed to efficiently manage virtual, augmented, and
                 mixed reality (VAMR) video content. VAMR video differs
                 from its two-dimensional counterpart in that it is
                 spherical with periodic angular dimensions, is
                 nonuniformly and continuously sampled, and applications
                 that consume such videos often have demanding latency
                 and throughput requirements. To address these
                 challenges, LightDB treats VAMR video data as a
                 logically-continuous six-dimensional light field.
                 Furthermore, LightDB supports a rich set of operations
                 over light fields, and automatically transforms
                 declarative queries into executable physical plans. We
                 have implemented a prototype of LightDB and, through
                 experiments with VAMR applications in the literature,
                 we find that LightDB offers up to $ 4 \times $
                 throughput improvements compared with prior work.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{McKenna:2018:OEH,
  author =       "Ryan McKenna and Gerome Miklau and Michael Hay and
                 Ashwin Machanavajjhala",
  title =        "Optimizing error of high-dimensional statistical
                 queries under differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1206--1219",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231769",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Differentially private algorithms for answering sets
                 of predicate counting queries on a sensitive database
                 have many applications. Organizations that collect
                 individual-level data, such as statistical agencies and
                 medical institutions, use them to safely release
                 summary tabulations. However, existing techniques are
                 accurate only on a narrow class of query workloads, or
                 are extremely slow, especially when analyzing more than
                 one or two dimensions of the data. In this work we
                 propose HDMM, a new differentially private algorithm
                 for answering a workload of predicate counting queries,
                 that is especially effective for higher-dimensional
                 datasets. HDMM represents query workloads using an
                 implicit matrix representation and exploits this
                 compact representation to efficiently search (a subset
                 of) the space of differentially private algorithms for
                 one that answers the input query workload with high
                 accuracy. We empirically show that HDMM can efficiently
                 answer queries with lower error than state-of-the-art
                 techniques on a variety of low and high dimensional
                 datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2018:MBM,
  author =       "Yu Liu and Hantian Zhang and Luyuan Zeng and Wentao Wu
                 and Ce Zhang",
  title =        "{MLbench}: benchmarking machine learning services
                 against human experts",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1220--1232",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231770",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern machine learning services and systems are
                 complicated data systems --- the process of designing
                 such systems is an art of compromising between
                 functionality, performance, and quality. Providing
                 different levels of system supports for different
                 functionalities, such as automatic feature engineering,
                 model selection and ensemble, and hyperparameter
                 tuning, could improve the quality, but also introduce
                 additional cost and system complexity. In this paper,
                 we try to facilitate the process of asking the
                 following type of questions: How much will the users
                 lose if we remove the support of functionality x from a
                 machine learning service? Answering this type of
                 questions using existing datasets, such as the UCI
                 datasets, is challenging. The main contribution of this
                 work is a novel dataset, MLBench, harvested from Kaggle
                 competitions. Unlike existing datasets, MLBench
                 contains not only the raw features for a machine
                 learning task, but also those used by the winning teams
                 of Kaggle competitions. The winning features serve as a
                 baseline of best human effort that enables multiple
                 ways to measure the quality of machine learning
                 services that cannot be supported by existing datasets,
                 such as relative ranking on Kaggle and relative
                 accuracy compared with best-effort systems. We then
                 conduct an empirical study using MLBench to understand
                 example machine learning services from Amazon and
                 Microsoft Azure, and showcase how MLBench enables a
                 comparative study revealing the strength and weakness
                 of these existing machine learning services
                 quantitatively and systematically. The full version of
                 this paper can be found at {\tt
                 arxiv.org/abs/1707.09562}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2018:MCL,
  author =       "Lu Chen and Chengfei Liu and Rui Zhou and Jianxin Li
                 and Xiaochun Yang and Bin Wang",
  title =        "Maximum co-located community search in large scale
                 social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1233--1246",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231755",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The problem of k-truss search has been well defined
                 and investigated to find the highly correlated user
                 groups in social networks. But there is no previous
                 study to consider the constraint of users' spatial
                 information in k-truss search, denoted as co-located
                 community search in this paper. The co-located
                 community can serve many real applications. To search
                 the maximum co-located communities efficiently, we
                 first develop an efficient exact algorithm with several
                 pruning techniques. After that, we further develop an
                 approximation algorithm with adjustable accuracy
                 guarantees and explore more effective pruning rules,
                 which can reduce the computational cost significantly.
                 To accelerate the real-time efficiency, we also devise
                 a novel quadtree based index to support the efficient
                 retrieval of users in a region and optimise the search
                 regions with regards to the given query region.
                 Finally, we verify the performance of our proposed
                 algorithms and index using five real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zalipynis:2018:CDF,
  author =       "Ramon Antonio Rodriges Zalipynis",
  title =        "{ChronosDB}: distributed, file based, geospatial array
                 {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1247--1261",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231754",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "An array DBMS streamlines large N-d array management.
                 A large portion of such arrays originates from the
                 geospatial domain. The arrays often natively come as
                 raster files while standalone command line tools are
                 one of the most popular ways for processing these
                 files. Decades of development and feedback resulted in
                 numerous feature-rich, elaborate, free and
                 quality-assured tools optimized mostly for a single
                 machine. ChronosDB partially delegates in situ data
                 processing to such tools and offers a formal N-d array
                 data model to abstract from the files and the tools.
                 ChronosDB readily provides a rich collection of array
                 operations at scale and outperforms SciDB by up to $ 75
                 \times $ on average.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Macke:2018:ASR,
  author =       "Stephen Macke and Yiming Zhang and Silu Huang and
                 Aditya Parameswaran",
  title =        "Adaptive sampling for rapidly matching histograms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1262--1275",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231753",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In exploratory data analysis, analysts often have a
                 need to identify histograms that possess a specific
                 distribution, among a large class of candidate
                 histograms, e.g., find countries whose income
                 distribution is most similar to that of Greece. This
                 distribution could be a new one that the user is
                 curious about, or a known distribution from an existing
                 histogram visualization. At present, this process of
                 identification is brute-force, requiring the manual
                 generation and evaluation of a large number of
                 histograms. We present FastMatch: an end-to-end
                 approach for interactively retrieving the histogram
                 visualizations most similar to a user-specified target,
                 from a large collection of histograms. The primary
                 technical contribution underlying FastMatch is a
                 probabilistic algorithm, HistSim, a theoretically sound
                 sampling-based approach to identify the top- k closest
                 histograms under $ l_1 $ distance. While HistSim can be
                 used independently, within FastMatch we couple HistSim
                 with a novel system architecture that is aware of
                 practical considerations, employing asynchronous
                 block-based sampling policies. FastMatch obtains
                 near-perfect accuracy with up to $ 35 \times $ speedup
                 over approaches that do not use sampling on several
                 real-world datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Asudeh:2018:LSJ,
  author =       "Abolfazl Asudeh and Azade Nazi and Jees Augustine and
                 Saravanan Thirumuruganathan and Nan Zhang and Gautam
                 Das and Divesh Srivastava",
  title =        "Leveraging similarity joins for signal
                 reconstruction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1276--1288",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231752",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Signal reconstruction problem (SRP) is an important
                 optimization problem where the objective is to identify
                 a solution to an underdetermined system of linear
                 equations that is closest to a given prior. It has a
                 substantial number of applications in diverse areas
                 including network traffic engineering, medical image
                 reconstruction, acoustics, astronomy and many more.
                 Most common approaches for SRP do not scale to large
                 problem sizes. In this paper, we propose a dual
                 formulation of this problem and show how adapting
                 database techniques developed for scalable similarity
                 joins provides a significant speedup. Extensive
                 experiments on real-world and synthetic data show that
                 our approach produces a significant speedup of up to
                 20x over competing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yu:2018:SHC,
  author =       "Xiangyao Yu and Yu Xia and Andrew Pavlo and Daniel
                 Sanchez and Larry Rudolph and Srinivas Devadas",
  title =        "{Sundial}: harmonizing concurrency control and caching
                 in a distributed {OLTP} database management system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1289--1302",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231763",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed transactions suffer from poor performance
                 due to two major limiting factors. First, distributed
                 transactions suffer from high latency because each of
                 their accesses to remote data incurs a long network
                 delay. Second, this high latency increases the
                 likelihood of contention among distributed
                 transactions, leading to high abort rates and low
                 performance. We present Sundial, an in-memory
                 distributed optimistic concurrency control protocol
                 that addresses these two limitations. First, to reduce
                 the transaction abort rate, Sundial dynamically
                 determines the logical order among transactions at
                 runtime, based on their data access patterns. Sundial
                 achieves this by applying logical leases to each data
                 element, which allows the database to dynamically
                 calculate a transaction's logical commit timestamp.
                 Second, to reduce the overhead of remote data accesses,
                 Sundial allows the database to cache remote data in a
                 server's local main memory and maintains cache
                 coherence. With logical leases, Sundial integrates
                 concurrency control and cache coherence into a simple
                 unified protocol. We evaluate Sundial against
                 state-of-the-art distributed concurrency control
                 protocols. Sundial outperforms the next-best protocol
                 by up to 57\% under high contention. Sundial's caching
                 scheme improves performance by up to $ 4.6 \times $ in
                 workloads with high access skew.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mai:2018:CSP,
  author =       "Luo Mai and Kai Zeng and Rahul Potharaju and Le Xu and
                 Steve Suh and Shivaram Venkataraman and Paolo Costa and
                 Terry Kim and Saravanan Muthukrishnan and Vamsi Kuppa
                 and Sudheer Dhulipalla and Sriram Rao",
  title =        "{Chi}: a scalable and programmable control plane for
                 distributed stream processing systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "10",
  pages =        "1303--1316",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3231751.3231765",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 26 16:31:24 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Stream-processing workloads and modern shared cluster
                 environments exhibit high variability and
                 unpredictability. Combined with the large parameter
                 space and the diverse set of user SLOs, this makes
                 modern streaming systems very challenging to statically
                 configure and tune. To address these issues, in this
                 paper we investigate a novel control-plane design, Chi,
                 which supports continuous monitoring and feedback, and
                 enables dynamic re-configuration. Chi leverages the key
                 insight of embedding control-plane messages in the
                 data-plane channels to achieve a low-latency and
                 flexible control plane for stream-processing systems.
                 Chi introduces a new reactive programming model and
                 design mechanisms to asynchronously execute control
                 policies, thus avoiding global synchronization. We show
                 how this allows us to easily implement a wide spectrum
                 of control policies targeting different use cases
                 observed in production. Large-scale experiments using
                 production workloads from a popular cloud provider
                 demonstrate the flexibility and efficiency of our
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Thomas:2018:CES,
  author =       "Anthony Thomas and Arun Kumar",
  title =        "A comparative evaluation of systems for scalable
                 linear algebra-based analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "13",
  pages =        "2168--2182",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275366.3275367",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Oct 11 16:22:00 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The growing use of statistical and machine learning
                 (ML) algorithms to analyze large datasets has given
                 rise to new systems to scale such algorithms. But
                 implementing new scalable algorithms in low-level
                 languages is a painful process, especially for
                 enterprise and scientific users. To mitigate this
                 issue, a new breed of systems expose high-level bulk
                 linear algebra (LA) primitives that are scalable. By
                 composing such LA primitives, users can write analysis
                 algorithms in a higher-level language, while the system
                 handles scalability issues. But there is little work on
                 a unified comparative evaluation of the scalability,
                 efficiency, and effectiveness of such ``scalable LA
                 systems.'' We take a major step towards filling this
                 gap. We introduce a suite of LA-specific tests based on
                 our analysis of the data access and communication
                 patterns of LA workloads and their use cases. Using our
                 tests, we perform a comprehensive empirical comparison
                 of a few popular scalable LA systems: MADlib, MLlib,
                 SystemML, ScaLAPACK, SciDB, and TensorFlow using both
                 synthetic data and a large real-world dataset. Our
                 study has revealed several scalability bottlenecks,
                 unusual performance trends, and even bugs in some
                 systems. Our findings have already led to improvements
                 in SystemML, with other systems' developers also
                 expressing interest. All of our code and data scripts
                 are available for download at
                 https://adalabucsd.github.io/slab.html.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karthik:2018:CPL,
  author =       "Srinivas Karthik and Jayant R. Haritsa and Sreyash
                 Kenkre and Vinayaka Pandit",
  title =        "A concave path to low-overhead robust query
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "13",
  pages =        "2183--2195",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275366.3275368",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Oct 11 16:22:00 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To address the classical selectivity estimation
                 problem in database systems, a radically different
                 query processing technique called PlanBouquet was
                 proposed in 2014. In this approach, the estimation
                 process is completely abandoned and replaced with a
                 calibrated selectivity discovery mechanism. The
                 beneficial outcome is that provable guarantees are
                 obtained on worst-case execution performance, thereby
                 facilitating robust query processing. An improved
                 version of PlanBouquet, called SpillBound (SB), which
                 significantly accelerates the selectivity discovery
                 process, and provides platform-independent performance
                 guarantees, was presented two years ago.
                 Notwithstanding its benefits, a limitation of
                 SpillBound is that its guarantees are predicated on
                 expending enormous preprocessing efforts during query
                 compilation, making it suitable only for canned queries
                 that are invoked repeatedly. In this paper, we address
                 this limitation by leveraging the fact that plan cost
                 functions typically exhibit concave down behavior with
                 regard to predicate selectivities. Specifically, we
                 design FrugalSpillBound, which provably achieves
                 extremely attractive tradeoffs between the performance
                 guarantees and the compilation overheads. For instance,
                 relaxing the performance guarantee by a factor of two
                 typically results in at least two orders of magnitude
                 reduction in the overheads. Further, when empirically
                 evaluated on benchmark OLAP queries, the decrease in
                 overheads is even greater, often more than three orders
                 of magnitude. Therefore, FrugalSpillBound substantively
                 extends robust query processing towards supporting
                 ad-hoc queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wen:2018:ISE,
  author =       "Yuhao Wen and Xiaodan Zhu and Sudeepa Roy and Jun
                 Yang",
  title =        "Interactive summarization and exploration of top
                 aggregate query answers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "13",
  pages =        "2196--2208",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275366.3275369",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Oct 11 16:22:00 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present a system for summarization and interactive
                 exploration of high-valued aggregate query answers to
                 make a large set of possible answers more informative
                 to the user. Our system outputs a set of clusters on
                 the high-valued query answers showing their common
                 properties such that the clusters are diverse as much
                 as possible to avoid repeating information, and cover a
                 certain number of top original answers as indicated by
                 the user. Further, the system facilitates interactive
                 exploration of the query answers by helping the user
                 (i) choose combinations of parameters for clustering,
                 (ii) inspect the clusters as well as the elements they
                 contain, and (iii) visualize how changes in parameters
                 affect clustering. We define optimization problems,
                 study their complexity, explore properties of the
                 solutions investigating the semi-lattice structure on
                 the clusters, and propose efficient algorithms and
                 optimizations to achieve these goals. We evaluate our
                 techniques experimentally and discuss our prototype
                 with a graphical user interface that facilitates this
                 interactive exploration. A user study is conducted to
                 evaluate the usability of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kersten:2018:EYA,
  author =       "Timo Kersten and Viktor Leis and Alfons Kemper and
                 Thomas Neumann and Andrew Pavlo and Peter Boncz",
  title =        "Everything you always wanted to know about compiled
                 and vectorized queries but were afraid to ask",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "13",
  pages =        "2209--2222",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275366.3275370",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Oct 11 16:22:00 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The query engines of most modern database systems are
                 either based on vectorization or data-centric code
                 generation. These two state-of-the-art query processing
                 paradigms are fundamentally different in terms of
                 system structure and query execution code. Both
                 paradigms were used to build fast systems. However,
                 until today it is not clear which paradigm yields
                 faster query execution, as many implementation-specific
                 choices obstruct a direct comparison of architectures.
                 In this paper, we experimentally compare the two models
                 by implementing both within the same test system. This
                 allows us to use for both models the same query
                 processing algorithms, the same data structures, and
                 the same parallelization framework to ultimately create
                 an apples-to-apples comparison. We find that both are
                 efficient, but have different strengths and weaknesses.
                 Vectorization is better at hiding cache miss latency,
                 whereas data-centric compilation requires fewer CPU
                 instructions, which benefits cache-resident workloads.
                 Besides raw, single-threaded performance, we also
                 investigate SIMD as well as multi-core parallelization
                 and different hardware architectures. Finally, we
                 analyze qualitative differences as a guide for system
                 architects.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gao:2018:DTK,
  author =       "Junyang Gao and Pankaj K. Agarwal and Jun Yang",
  title =        "Durable top-$k$ queries on temporal data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "13",
  pages =        "2223--2235",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275366.3275371",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Oct 11 16:22:00 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many datasets have a temporal dimension and contain a
                 wealth of historical information. When using such data
                 to make decisions, we often want to examine not only
                 the current snapshot of the data but also its history.
                 For example, given a result object of a snapshot query,
                 we can ask for its ``durability,'' or intuitively, how
                 long (or how often) it was valid in the past. This
                 paper considers durable top-k queries, which look for
                 objects whose values were among the top k for at least
                 some fraction of the times during a given
                 interval---e.g., stocks that were among the top 20 most
                 heavily traded for at least 80\% of the trading days
                 during the last quarter of 2017. We present a
                 comprehensive suite of techniques for solving this
                 problem, ranging from exact algorithms where k is fixed
                 in advance, to approximate methods that work for any k
                 and are able to exploit workload and data
                 characteristics to improve accuracy while capping index
                 cost. We show that our methods vastly outperform
                 baseline and previous methods using both real and
                 synthetic datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Linardi:2018:SVL,
  author =       "Michele Linardi and Themis Palpanas",
  title =        "Scalable, variable-length similarity search in data
                 series: the {ULISSE} approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "13",
  pages =        "2236--2248",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275366.3275372",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Oct 11 16:22:00 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data series similarity search is an important
                 operation and at the core of several analysis tasks and
                 applications related to data series collections.
                 Despite the fact that data series indexes enable fast
                 similarity search, all existing indexes can only answer
                 queries of a single length (fixed at index construction
                 time), which is a severe limitation. In this work, we
                 propose ULISSE, the first data series index structure
                 designed for answering similarity search queries of
                 variable length. Our contribution is two-fold. First,
                 we introduce a novel representation technique, which
                 effectively and succinctly summarizes multiple
                 sequences of different length (irrespective of
                 Z-normalization). Based on the proposed index, we
                 describe efficient algorithms for approximate and exact
                 similarity search, combining disk based index visits
                 and in-memory sequential scans. We experimentally
                 evaluate our approach using several synthetic and real
                 datasets. The results show that ULISSE is several times
                 (and up to orders of magnitude) more efficient in terms
                 of both space and time cost, when compared to competing
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sauer:2018:FLS,
  author =       "Caetano Sauer and Goetz Graefe and Theo H{\"a}rder",
  title =        "{FineLine}: log-structured transactional storage and
                 recovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "13",
  pages =        "2249--2262",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275366.3275373",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Oct 11 16:22:00 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recovery is an intricate aspect of transaction
                 processing architectures. In its traditional
                 implementation, recovery requires the management of two
                 persistent data stores---a write-ahead log and a
                 materialized database---which must be carefully
                 orchestrated to maintain transactional consistency.
                 Furthermore, the design and implementation of recovery
                 algorithms have deep ramifications into almost every
                 component of the internal system architecture, from
                 concurrency control to buffer management and access
                 path implementation. Such complexity not only incurs
                 high costs for development, testing, and training, but
                 also unavoidably affects system performance,
                 introducing overheads and limiting scalability. This
                 paper proposes a novel approach for transactional
                 storage and recovery called FineLine. It simplifies the
                 implementation of transactional database systems by
                 eliminating the log-database duality and maintaining
                 all persistent data in a single, log-structured data
                 structure. This approach not only provides more
                 efficient recovery with less overhead, but also
                 decouples the management of persistent data from
                 in-memory access paths. As such, it blurs the lines
                 that separate in-memory from disk-based database
                 systems, providing the efficiency of the former with
                 the reliability of the latter.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rahman:2018:IMH,
  author =       "Protiva Rahman and Courtney Hebert and Arnab Nandi",
  title =        "{ICARUS}: minimizing human effort in iterative data
                 completion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "11",
  number =       "13",
  pages =        "2263--2276",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275366.3275374",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Oct 11 16:22:00 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "An important step in data preparation involves dealing
                 with incomplete datasets. In some cases, the missing
                 values are unreported because they are characteristics
                 of the domain and are known by practitioners. Due to
                 this nature of the missing values, imputation and
                 inference methods do not work and input from domain
                 experts is required. A common method for experts to
                 fill missing values is through rules. However, for
                 large datasets with thousands of missing data points,
                 it is laborious and time consuming for a user to make
                 sense of the data and formulate effective completion
                 rules. Thus, users need to be shown subsets of the data
                 that will have the most impact in completing missing
                 fields. Further, these subsets should provide the user
                 with enough information to make an update. Choosing
                 subsets that maximize the probability of filling in
                 missing data from a large dataset is computationally
                 expensive. To address these challenges, we present
                 Icarus, which uses a heuristic algorithm to show the
                 user small subsets of the database in the form of a
                 matrix. This allows the user to iteratively fill in
                 data by applying suggested rules based on their direct
                 edits to the matrix. The suggested rules amplify the
                 users' input to multiple missing fields by using the
                 database schema to infer hierarchies. Simulations show
                 Icarus has an average improvement of 50\% across three
                 datasets over the baseline system. Further, in-person
                 user studies demonstrate that naive users can fill in
                 68\% of missing data within an hour, while manual rule
                 specification spans weeks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kim:2018:LIW,
  author =       "Sunghwan Kim and Taesung Lee and Seung-won Hwang and
                 Sameh Elnikety",
  title =        "List intersection for web search: algorithms, cost
                 models, and optimizations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "1",
  pages =        "1--13",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275536.3275537",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:47 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper studies the optimization of list
                 intersection, especially in the context of the matching
                 phase of search engines. Given a user query, we
                 intersect the postings lists corresponding to the query
                 keywords to generate the list of documents matching all
                 keywords. Since the speed of list intersection depends
                 the algorithm, hardware, and list lengths and their
                 correlations, none the existing intersection algorithms
                 outperforms the others in every scenario. Therefore, we
                 develop a cost-based approach in which we identify a
                 search space, spanning existing algorithms and their
                 combinations. We propose a cost model to estimate the
                 cost of the algorithms with their combinations, and use
                 the cost model to search for the lowest-cost algorithm.
                 The resulting plan is usually a combination of 2-way
                 algorithms, outperforming conventional 2-way and k -way
                 algorithms. The proposed approach is more general than
                 designing a specific algorithm, as the cost models can
                 be adapted to different hardware. We validate the cost
                 model experimentally on two different CPUs, and show
                 that the cost model closely estimates the actual cost.
                 Using both real and synthetic datasets, we show that
                 the proposed cost-based optimizer outperforms the
                 state-of-the-art alternatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Whittaker:2018:ICC,
  author =       "Michael Whittaker and Joseph M. Hellerstein",
  title =        "Interactive checks for coordination avoidance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "1",
  pages =        "14--27",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275536.3275538",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:47 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Strongly consistent distributed systems are easy to
                 reason about but face fundamental limitations in
                 availability and performance. Weakly consistent systems
                 can be implemented with very high performance but place
                 a burden on the application developer to reason about
                 complex interleavings of execution. Invariant
                 confluence provides a formal framework for
                 understanding when we can get the best of both worlds.
                 An invariant confluent object can be efficiently
                 replicated with no coordination needed to preserve its
                 invariants. However, actually determining whether or
                 not an object is invariant confluent is challenging. In
                 this paper, we establish conditions under which a
                 commonly used sufficient condition for invariant
                 confluence is both necessary and sufficient, and we use
                 this condition to design (a) a general-purpose
                 interactive invariant confluence decision procedure and
                 (b) a novel sufficient condition that can be checked
                 automatically. We then take a step beyond invariant
                 confluence and introduce a generalization of invariant
                 confluence, called segmented invariant confluence, that
                 allows us to replicate non-invariant confluent objects
                 with a small amount of coordination. We implemented
                 these formalisms in a prototype called Lucy and found
                 that our decision procedures efficiently handle common
                 real-world workloads including foreign keys, rollups,
                 escrow transactions, and more. We also found that
                 segmented invariant confluent replication can deliver
                 up to an order of magnitude more throughput than
                 linearizable replication for low contention workloads
                 and comparable throughput for medium to high contention
                 workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qin:2018:PPF,
  author =       "Jianbin Qin and Chuan Xiao",
  title =        "Pigeonring: a principle for faster thresholded
                 similarity search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "1",
  pages =        "28--42",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275536.3275539",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:47 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The pigeonhole principle states that if n items are
                 contained in m boxes, then at least one box has no more
                 than n/m items. It is utilized to solve many data
                 management problems, especially for thresholded
                 similarity searches. Despite many pigeonhole
                 principle-based solutions proposed in the last few
                 decades, the condition stated by the principle is weak.
                 It only constrains the number of items in a single box.
                 By organizing the boxes in a ring, we propose a new
                 principle, called the pigeonring principle, which
                 constrains the number of items in multiple boxes and
                 yields stronger conditions. To utilize the new
                 principle, we focus on problems defined in the form of
                 identifying data objects whose similarities or
                 distances to the query is constrained by a threshold.
                 Many solutions to these problems utilize the pigeonhole
                 principle to find candidates that satisfy a filtering
                 condition. By the new principle, stronger filtering
                 conditions can be established. We show that the
                 pigeonhole principle is a special case of the new
                 principle. This suggests that all the pigeonhole
                 principle-based solutions are possible to be
                 accelerated by the new principle. A universal filtering
                 framework is introduced to encompass the solutions to
                 these problems based on the new principle. Besides, we
                 discuss how to quickly find candidates specified by the
                 new principle. The implementation requires only minor
                 modifications on top of existing pigeonhole
                 principle-based algorithms. Experimental results on
                 real datasets demonstrate the applicability of the new
                 principle as well as the superior performance of the
                 algorithms based on the new principle.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sariyuce:2018:LAH,
  author =       "Ahmet Erdem Sariy{\"u}ce and C. Seshadhri and Ali
                 Pinar",
  title =        "Local algorithms for hierarchical dense subgraph
                 discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "1",
  pages =        "43--56",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275536.3275540",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:47 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Finding the dense regions of a graph and relations
                 among them is a fundamental problem in network
                 analysis. Core and truss decompositions reveal dense
                 subgraphs with hierarchical relations. The incremental
                 nature of algorithms for computing these decompositions
                 and the need for global information at each step of the
                 algorithm hinders scalable parallelization and
                 approximations since the densest regions are not
                 revealed until the end. In a previous work, Lu et al.
                 proposed to iteratively compute the h -indices of
                 neighbor vertex degrees to obtain the core numbers and
                 prove that the convergence is obtained after a finite
                 number of iterations. This work generalizes the
                 iterative h -index computation for truss decomposition
                 as well as nucleus decomposition which leverages
                 higher-order structures to generalize core and truss
                 decompositions. In addition, we prove convergence
                 bounds on the number of iterations. We present a
                 framework of local algorithms to obtain the core,
                 truss, and nucleus decompositions. Our algorithms are
                 local, parallel, offer high scalability, and enable
                 approximations to explore time and quality trade-offs.
                 Our shared-memory implementation verifies the
                 efficiency, scalability, and effectiveness of our local
                 algorithms on real-world networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2018:CED,
  author =       "Jingru Yang and Ju Fan and Zhewei Wei and Guoliang Li
                 and Tongyu Liu and Xiaoyong Du",
  title =        "Cost-effective data annotation using game-based
                 crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "1",
  pages =        "57--70",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275536.3275541",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:47 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large-scale data annotation is indispensable for many
                 applications, such as machine learning and data
                 integration. However, existing annotation solutions
                 either incur expensive cost for large datasets or
                 produce noisy results. This paper introduces a
                 cost-effective annotation approach, and focuses on the
                 labeling rule generation problem that aims to generate
                 high-quality rules to largely reduce the labeling cost
                 while preserving quality. To address the problem, we
                 first generate candidate rules, and then devise a
                 game-based crowdsourcing approach C ROWDGAME to select
                 high-quality rules by considering coverage and
                 precision. CROWDGAME employs two groups of crowd
                 workers: one group answers rule validation tasks
                 (whether a rule is valid) to play a role of rule
                 generator, while the other group answers tuple checking
                 tasks (whether the annotated label of a data tuple is
                 correct) to play a role of rule refuter. We let the two
                 groups play a two-player game: rule generator
                 identifies high-quality rules with large coverage and
                 precision, while rule refuter tries to refute its
                 opponent rule generator by checking some tuples that
                 provide enough evidence to reject rules covering the
                 tuples. This paper studies the challenges in CROWDGAME.
                 The first is to balance the trade-off between coverage
                 and precision. We define the loss of a rule by
                 considering the two factors. The second is rule
                 precision estimation. We utilize Bayesian estimation to
                 combine both rule validation and tuple checking tasks.
                 The third is to select crowdsourcing tasks to fulfill
                 the game-based framework for minimizing the loss. We
                 introduce a minimax strategy and develop efficient task
                 selection algorithms. We conduct experiments on entity
                 matching and relation extraction, and the results show
                 that our method outperforms state-of-the-art
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2018:OAL,
  author =       "Enhui Huang and Liping Peng and Luciano {Di Palma} and
                 Ahmed Abdelkafi and Anna Liu and Yanlei Diao",
  title =        "Optimization for active learning-based interactive
                 database exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "1",
  pages =        "71--84",
  month =        sep,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3275536.3275542",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:47 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There is an increasing gap between fast growth of data
                 and limited human ability to comprehend data.
                 Consequently, there has been a growing demand of data
                 management tools that can bridge this gap and help the
                 user retrieve high-value content from data more
                 effectively. In this work, we aim to build interactive
                 data exploration as a new database service, using an
                 approach called ``explore-by-example''. In particular,
                 we cast the explore-by-example problem in a principled
                 ``active learning'' framework, and bring the properties
                 of important classes of database queries to bear on the
                 design of new algorithms and optimizations for active
                 learning-based database exploration. These new
                 techniques allow the database system to overcome a
                 fundamental limitation of traditional active learning,
                 i.e., the slow convergence problem. Evaluation results
                 using real-world datasets and user interest patterns
                 show that our new system significantly outperforms
                 state-of-the-art active learning techniques and data
                 exploration systems in accuracy while achieving desired
                 efficiency for interactive performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bleifuss:2018:ECN,
  author =       "Tobias Bleifu{\ss} and Leon Bornemann and Theodore
                 Johnson and Dmitri V. Kalashnikov and Felix Naumann and
                 Divesh Srivastava",
  title =        "Exploring change: a new dimension of data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "2",
  pages =        "85--98",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3282495.3282496",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:48 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data and metadata in datasets experience many
                 different kinds of change. Values are inserted, deleted
                 or updated; rows appear and disappear; columns are
                 added or repurposed, etc. In such a dynamic situation,
                 users might have many questions related to changes in
                 the dataset, for instance which parts of the data are
                 trustworthy and which are not? Users will wonder: How
                 many changes have there been in the recent minutes,
                 days or years? What kind of changes were made at which
                 points of time? How dirty is the data? Is data
                 cleansing required? The fact that data changed can hint
                 at different hidden processes or agendas: a frequently
                 crowd-updated city name may be controversial; a person
                 whose name has been recently changed may be the target
                 of vandalism; and so on. We show various use cases that
                 benefit from recognizing and exploring such change. We
                 envision a system and methods to interactively explore
                 such change, addressing the variability dimension of
                 big data challenges. To this end, we propose a model to
                 capture change and the process of exploring dynamic
                 data to identify salient changes. We provide
                 exploration primitives along with motivational examples
                 and measures for the volatility of data. We identify
                 technical challenges that need to be addressed to make
                 our vision a reality, and propose directions of future
                 work for the data management community.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ghosh:2018:FSS,
  author =       "Bishwamittra Ghosh and Mohammed Eunus Ali and Farhana
                 M. Choudhury and Sajid Hasan Apon and Timos Sellis and
                 Jianxin Li",
  title =        "The flexible socio spatial group queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "2",
  pages =        "99--111",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3282495.3282497",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:48 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A socio spatial group query finds a group of users who
                 possess strong social connections with each other and
                 have the minimum aggregate spatial distance to a
                 meeting point. Existing studies limit to either finding
                 the best group of a fixed size for a single meeting
                 location, or a single group of a fixed size w.r.t.
                 multiple locations. However, it is highly desirable to
                 consider multiple locations in a real-life scenario in
                 order to organize impromptu activities of groups of
                 various sizes. In this paper, we propose Top k Flexible
                 Socio Spatial Group Query (Top k-FSSGQ) to find the top
                 k groups w.r.t. multiple POIs where each group follows
                 the minimum social connectivity constraints. We devise
                 a ranking function to measure the group score by
                 combining social closeness, spatial distance, and group
                 size, which provides the flexibility of choosing groups
                 of different sizes under different constraints. To
                 effectively process the Top k-FSSGQ, we first develop
                 an Exact approach that ensures early termination of the
                 search based on the derived upper bounds. We prove that
                 the problem is NP-hard, hence we first present a
                 heuristic based approximation algorithm to effectively
                 select members in intermediate solution groups based on
                 the social connectivity of the users. Later we design a
                 Fast Approximate approach based on the relaxed social
                 and spatial bounds, and connectivity constraint
                 heuristic. Experimental studies have verified the
                 effectiveness and efficiency of our proposed approaches
                 on real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Echihabi:2018:LHD,
  author =       "Karima Echihabi and Kostas Zoumpatianos and Themis
                 Palpanas and Houda Benbrahim",
  title =        "The {Lernaean Hydra} of data series similarity search:
                 an experimental evaluation of the state of the art",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "2",
  pages =        "112--127",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3282495.3282498",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:48 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Increasingly large data series collections are
                 becoming commonplace across many different domains and
                 applications. A key operation in the analysis of data
                 series collections is similarity search, which has
                 attracted lots of attention and effort over the past
                 two decades. Even though several relevant approaches
                 have been proposed in the literature, none of the
                 existing studies provides a detailed evaluation against
                 the available alternatives. The lack of comparative
                 results is further exacerbated by the non-standard use
                 of terminology, which has led to confusion and
                 misconceptions. In this paper, we provide definitions
                 for the different flavors of similarity search that
                 have been studied in the past, and present the first
                 systematic experimental evaluation of the efficiency of
                 data series similarity search techniques. Based on the
                 experimental results, we describe the strengths and
                 weaknesses of each approach and give recommendations
                 for the best approach to use under typical use cases.
                 Finally, by identifying the shortcomings of each
                 method, our findings lay the ground for solid further
                 developments in the field.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2018:RML,
  author =       "Wei Wang and Jinyang Gao and Meihui Zhang and Sheng
                 Wang and Gang Chen and Teck Khim Ng and Beng Chin Ooi
                 and Jie Shao and Moaz Reyad",
  title =        "{Rafiki}: machine learning as an analytics service
                 system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "2",
  pages =        "128--140",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3282495.3282499",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:48 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Big data analytics is gaining massive momentum in the
                 last few years. Applying machine learning models to big
                 data has become an implicit requirement or an
                 expectation for most analysis tasks, especially on
                 high-stakes applications. Typical applications include
                 sentiment analysis against reviews for analyzing
                 on-line products, image classification in food logging
                 applications for monitoring user's daily intake, and
                 stock movement prediction. Extending traditional
                 database systems to support the above analysis is
                 intriguing but challenging. First, it is almost
                 impossible to implement all machine learning models in
                 the database engines. Second, expert knowledge is
                 required to optimize the training and inference
                 procedures in terms of efficiency and effectiveness,
                 which imposes heavy burden on the system users. In this
                 paper, we develop and present a system, called Rafiki,
                 to provide the training and inference service of
                 machine learning models. Rafiki provides distributed
                 hyper-parameter tuning for the training service, and
                 online ensemble modeling for the inference service
                 which trades off between latency and accuracy.
                 Experimental results confirm the efficiency,
                 effectiveness, scalability and usability of Rafiki.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Subotic:2018:AIS,
  author =       "Pavle Suboti{\'c} and Herbert Jordan and Lijun Chang
                 and Alan Fekete and Bernhard Scholz",
  title =        "Automatic index selection for large-scale datalog
                 computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "2",
  pages =        "141--153",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3282495.3282500",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:48 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Datalog has been applied to several use cases that
                 require very high performance on large rulesets and
                 factsets. It is common to create indexes for relations
                 to improve search performance. However, the existing
                 indexing schemes either require manual index selection
                 or result in insufficient performance on very large
                 tasks. In this paper, we propose an automatic scheme to
                 select indexes. We automatically create the minimum
                 number of indexes to speed up all the searches in a
                 given Datalog program. We have integrated our indexing
                 scheme into an open-source Datalog engine S OUFFL{\'E}.
                 We obtain performance on a par with what users have
                 accepted from hand-optimized Datalog programs running
                 on state-of-the-art Datalog engines, while we do not
                 require the effort of manual index selection. Extensive
                 experiments on large real Datalog programs demonstrate
                 that our indexing scheme results in considerable
                 speedups (up to 2x) and significantly less memory usage
                 (up to 6x) compared with other automated index
                 selections.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Song:2018:SLF,
  author =       "Shuang Song and Xu Liu and Qinzhe Wu and Andreas
                 Gerstlauer and Tao Li and Lizy K. John",
  title =        "Start late, finish early: a distributed graph
                 processing system with redundancy reduction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "2",
  pages =        "154--168",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3282495.3282501",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:48 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph processing systems are important in the big data
                 domain. However, processing graphs in parallel often
                 introduces redundant computations in existing
                 algorithms and models. Prior work has proposed
                 techniques to optimize redundancies for out-of-core
                 graph systems, rather than distributed graph systems.
                 In this paper, we study various state-of-the-art
                 distributed graph systems and observe root causes for
                 these pervasively existing redundancies. To reduce
                 redundancies without sacrificing parallelism, we
                 further propose SLFE, a distributed graph processing
                 system, designed with the principle of ``start late or
                 finish early''. SLFE employs a novel preprocessing
                 stage to obtain a graph's topological knowledge with
                 negligible overhead. SLFE's redundancy-aware
                 vertex-centric computation model can then utilize such
                 knowledge to reduce the redundant computations at
                 runtime. SLFE also provides a set of APIs to improve
                 programmability. Our experiments on an 8-machine
                 high-performance cluster show that SLFE outperforms all
                 well-known distributed graph processing systems with
                 the inputs of real-world graphs, yielding up to 75x
                 speedup. Moreover, SLFE outperforms two
                 state-of-the-art shared memory graph systems on a
                 high-end machine with up to 1644x speedup. SLFE's
                 redundancy-reduction schemes are generally applicable
                 to other vertex-centric graph processing systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2018:IOC,
  author =       "Bailu Ding and Lucja Kot and Johannes Gehrke",
  title =        "Improving optimistic concurrency control through
                 transaction batching and operation reordering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "2",
  pages =        "169--182",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3282495.3282502",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 2 18:29:48 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "OLTP systems can often improve throughput by batching
                 transactions and processing them as a group. Batching
                 has been used for optimizations such as message packing
                 and group commits; however, there is little research on
                 the benefits of a holistic approach to batching across
                 a transaction's entire life cycle. In this paper, we
                 present a framework to incorporate batching at multiple
                 stages of transaction execution for OLTP systems based
                 on optimistic concurrency control. Storage batching
                 enables reordering of transaction reads and writes at
                 the storage layer, reducing conflicts on the same
                 object. Validator batching enables reordering of
                 transactions before validation, reducing conflicts
                 between transactions. Dependencies between transactions
                 make transaction reordering a non-trivial problem, and
                 we propose several efficient and practical algorithms
                 that can be customized to various transaction
                 precedence policies such as reducing tail latency. We
                 also show how to reorder transactions with a
                 thread-aware policy in multi-threaded OLTP architecture
                 without a centralized validator. In-depth experiments
                 on a research prototype, an opensource OLTP system, and
                 a production OLTP system show that our techniques
                 increase transaction throughput by up to 2.2x and
                 reduce their tail latency by up to 71\% compared with
                 the start-of-the-art systems on workloads with high
                 data contention.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xie:2018:QLC,
  author =       "Ting Xie and Varun Chandola and Oliver Kennedy",
  title =        "Query log compression for workload analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "183--196",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291265",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Analyzing database access logs is a key part of
                 performance tuning, intrusion detection, benchmark
                 development, and many other database administration
                 tasks. Unfortunately, it is common for production
                 databases to deal with millions or more queries each
                 day, so these logs must be summarized before they can
                 be used. Designing an appropriate summary encoding
                 requires trading off between conciseness and
                 information content. For example: simple workload
                 sampling may miss rare, but high impact queries. In
                 this paper, we present L OGR, a lossy log compression
                 scheme suitable for use in many automated log analytics
                 tools, as well as for human inspection. We formalize
                 and analyze the space/fidelity trade-off in the context
                 of a broader family of ``pattern'' and ``pattern
                 mixture'' log encodings to which LOGR belongs. We show
                 through a series of experiments that LOGR compressed
                 encodings can be created efficiently, come with
                 provable information-theoretic bounds on their
                 accuracy, and outperform state-of-art log summarization
                 strategies.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ali:2018:MTC,
  author =       "Mohammed Eunus Ali and Shadman Saqib Eusuf and Kaysar
                 Abdullah and Farhana M. Choudhury and J. Shane
                 Culpepper and Timos Sellis",
  title =        "The maximum trajectory coverage query in spatial
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "197--209",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291266",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the widespread use of GPS-enabled mobile devices,
                 an unprecedented amount of trajectory data has become
                 available from various sources such as Bikely,
                 GPS-wayPoints, and Uber. The rise of smart
                 transportation services and recent break-throughs in
                 autonomous vehicles increase our reliance on trajectory
                 data in a wide variety of applications. Supporting
                 these services in emerging platforms requires more
                 efficient query processing in trajectory databases. In
                 this paper, we propose two new coverage queries for
                 trajectory databases: (i) k Best Facility Trajectory
                 Search ( k BFT); and (ii) k Best Coverage Facility
                 Trajectory Search ( k BCovFT). We propose a novel index
                 structure, the Trajectory Quadtree (TQ-tree) that
                 utilizes a quadtree to hierarchically organize
                 trajectories into different nodes, and then applies a
                 z-ordering to further organize the trajectories by
                 spatial locality inside each node. This structure is
                 highly effective in pruning the trajectory search
                 space, which is of independent interest. By exploiting
                 the TQ-tree, we develop a divide-and-conquer approach
                 to efficiently process a k BFT query. To solve the k
                 BCovFT, which is a non-submodular NP-hard problem, we
                 propose a greedy approximation. We evaluate our
                 algorithms through an extensive experimental study on
                 several real datasets, and demonstrate that our
                 algorithms outperform baselines by two to three orders
                 of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2018:TLO,
  author =       "Chenggang Wu and Alekh Jindal and Saeed Amizadeh and
                 Hiren Patel and Wangchao Le and Shi Qiao and Sriram
                 Rao",
  title =        "Towards a learning optimizer for shared clouds",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "210--222",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291267",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query optimizers are notorious for inaccurate cost
                 estimates, leading to poor performance. The root of the
                 problem lies in inaccurate cardinality estimates, i.e.,
                 the size of intermediate (and final) results in a query
                 plan. These estimates also determine the resources
                 consumed in modern shared cloud infrastructures. In
                 this paper, we present C ARDLEARNER, a machine learning
                 based approach to learn cardinality models from
                 previous job executions and use them to predict the
                 cardinalities in future jobs. The key intuition in our
                 approach is that shared cloud workloads are often
                 recurring and overlapping in nature, and so we could
                 learn cardinality models for overlapping subgraph
                 templates. We discuss various learning approaches and
                 show how learning a large number of smaller models
                 results in high accuracy and explainability. We further
                 present an exploration technique to avoid learning bias
                 by considering alternate join orders and learning
                 cardinality models over them. We describe the feedback
                 loop to apply the learned models back to future job
                 executions. Finally, we show a detailed evaluation of
                 our models (up to 5 orders of magnitude less error),
                 query plans (60\% applicability), performance (up to
                 100\% faster, 3x fewer resources), and exploration
                 (optimal in few 10s of executions).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Varma:2018:SAW,
  author =       "Paroma Varma and Christopher R{\'e}",
  title =        "{Snuba}: automating weak supervision to label training
                 data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "223--236",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291268",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As deep learning models are applied to increasingly
                 diverse problems, a key bottleneck is gathering enough
                 high-quality training labels tailored to each task.
                 Users therefore turn to weak supervision, relying on
                 imperfect sources of labels like pattern matching and
                 user-defined heuristics. Unfortunately, users have to
                 design these sources for each task. This process can be
                 time consuming and expensive: domain experts often
                 perform repetitive steps like guessing optimal
                 numerical thresholds and developing informative text
                 patterns. To address these challenges, we present
                 Snuba, a system to automatically generate heuristics
                 using a small labeled dataset to assign training labels
                 to a large, unlabeled dataset in the weak supervision
                 setting. Snuba generates heuristics that each labels
                 the subset of the data it is accurate for, and
                 iteratively repeats this process until the heuristics
                 together label a large portion of the unlabeled data.
                 We develop a statistical measure that guarantees the
                 iterative process will automatically terminate before
                 it degrades training label quality. Snuba automatically
                 generates heuristics in under five minutes and performs
                 up to 9.74 F1 points better than the best known
                 user-defined heuristics developed over many days. In
                 collaborations with users at research labs, Stanford
                 Hospital, and on open source datasets, Snuba
                 outperforms other automated approaches like
                 semi-supervised learning by up to 14.35 F1 points.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Asudeh:2018:OSR,
  author =       "Abolfazl Asudeh and H. V. Jagadish and Gerome Miklau
                 and Julia Stoyanovich",
  title =        "On obtaining stable rankings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "237--250",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291269",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Decision making is challenging when there is more than
                 one criterion to consider. In such cases, it is common
                 to assign a goodness score to each item as a weighted
                 sum of its attribute values and rank them accordingly.
                 Clearly, the ranking obtained depends on the weights
                 used for this summation. Ideally, one would want the
                 ranked order not to change if the weights are changed
                 slightly. We call this property stability of the
                 ranking. A consumer of a ranked list may trust the
                 ranking more if it has high stability. A producer of a
                 ranked list prefers to choose weights that result in a
                 stable ranking, both to earn the trust of potential
                 consumers and because a stable ranking is intrinsically
                 likely to be more meaningful. In this paper, we develop
                 a framework that can be used to assess the stability of
                 a provided ranking and to obtain a stable ranking
                 within an ``acceptable'' range of weight values (called
                 ``the region of interest''). We address the case where
                 the user cares about the rank order of the entire set
                 of items, and also the case where the user cares only
                 about the top- k items. Using a geometric
                 interpretation, we propose algorithms that produce
                 stable rankings. In addition to theoretical analyses,
                 we conduct extensive experiments on real datasets that
                 validate our proposal.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ji:2018:PTB,
  author =       "Shuping Ji and Hans-Arno Jacobsen",
  title =        "{PS}-tree-based efficient {Boolean} expression
                 matching for high-dimensional and dense workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "251--264",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291270",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Boolean expression matching is an important function
                 for many applications. However, existing solutions
                 still suffer from limitations when applied to
                 high-dimensional and dense workloads. To overcome these
                 limitations, in this paper, we design a data structure
                 called PS-Tree that can efficiently index subscriptions
                 in one dimension. By dividing predicates into disjoint
                 predicate spaces, PS-Tree achieves high matching
                 performance and good expressiveness. Based on PS-Tree,
                 we first propose a Boolean expression matching
                 algorithm PSTBloom. By efficiently filtering out a
                 large proportion of unmatching subscriptions, PSTBloom
                 achieves high matching performance, especially for
                 high-dimensional workloads. PSTBloom also achieves fast
                 index construction and a small memory footprint.
                 Compared with state-of-the-art methods, comprehensive
                 experiments show that PSTBloom reduces matching time,
                 index construction time and memory usage by up to 84\%,
                 78\% and 94\%, respectively. Although PSTBloom is
                 effective for many workload distributions, dense
                 workloads represent new challenges to PSTBloom and
                 other algorithms. To effectively handle dense
                 workloads, we further propose the PSTHash algorithm,
                 which divides subscriptions into disjoint
                 multidimensional predicate spaces. This organization
                 prunes partially matching subscriptions efficiently.
                 Comprehensive experiments on both synthetic and
                 real-world datasets show that PSTHash improves the
                 matching performance by up to 92\% for dense
                 workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yan:2018:SMR,
  author =       "Yizhou Yan and Lei Cao and Samuel Madden and Elke A.
                 Rundensteiner",
  title =        "{SWIFT}: mining representative patterns from large
                 event streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "265--277",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291271",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Event streams generated by smart devices common in
                 modern Internet of Things applications must be
                 continuously mined to monitor the behavior of the
                 underlying system. In this work, we propose a stream
                 pattern mining system for supporting online IoT
                 applications. First, to solve the pattern explosion
                 problem of existing stream pattern mining strategies,
                 we now design pattern semantics that continuously
                 produce a compact set of patterns that max-imumly
                 compresses the dynamic data streams, called MDL-based
                 Representative Patterns (MRP). We then design a
                 one-pass SWIFT approach that continuously mines the
                 up-to-date MRP pattern set for each stream window upon
                 the arrival or expiration of individual events. We show
                 that SWIFT is guaranteed to select the update operation
                 for each individual incoming event that leads to the
                 most compact encoding of the sequence in the current
                 window. We further enhance SWIFT to support batch
                 updates, called B-SWIFT. B-SWIFT adopts a lazy update
                 strategy that guarantees that only the minimal number
                 of operations are conducted to process an incoming
                 event batch for MRP pattern mining. Evaluation by our
                 industry lighting lab collaborator demonstrates that
                 SWIFT successfully solves their use cases and finds
                 more representative patterns than the alternative
                 approaches adapting the state-of-the-art static
                 representative pattern mining methods. Our experimental
                 study confirms that SWIFT outperforms the best existing
                 method up to 50\% in the compactness of produced
                 pattern encodings, while providing a 4 orders of
                 magnitude speedup.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{C:2018:SSS,
  author =       "Paul Suganthan G. C. and Adel Ardalan and AnHai Doan
                 and Aditya Akella",
  title =        "{Smurf}: self-service string matching using random
                 forests",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "278--291",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291272",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We argue that more attention should be devoted to
                 developing self-service string matching (SM) solutions,
                 which lay users can easily use. We show that Falcon, a
                 self-service entity matching (EM) solution, can be
                 applied to SM and is more accurate than current
                 self-service SM solutions. However, Falcon often asks
                 lay users to label many string pairs (e.g., 770-1050 in
                 our experiments). This is expensive, can significantly
                 compound labeling mistakes, and takes a long time. We
                 developed Smurf, a self-service SM solution that
                 reduces the labeling effort by 43-76\%, yet achieves
                 comparable F$_1$ accuracy. The key to make Smurf
                 possible is a novel solution to efficiently execute a
                 random forest (that Smurf learns via active learning
                 with the lay user) over two sets of strings. This
                 solution uses RDBMS-style plan optimization to reuse
                 computations across the trees in the forest. As such,
                 Smurf significantly advances self-service SM and raises
                 interesting future directions for self-service EM and
                 scalable random forest execution over structured
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Liu:2018:CSD,
  author =       "Feilong Liu and Ario Salmasi and Spyros Blanas and
                 Anastasios Sidiropoulos",
  title =        "Chasing similarity: distribution-aware aggregation
                 scheduling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "292--306",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291273",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Parallel aggregation is a ubiquitous operation in data
                 analytics that is expressed as GROUP BY in SQL, reduce
                 in Hadoop, or segment in TensorFlow. Parallel
                 aggregation starts with an optional local
                 pre-aggregation step and then repartitions the
                 intermediate result across the network. While local
                 pre-aggregation works well for low-cardinality
                 aggregations, the network communication cost remains
                 significant for high-cardinality aggregations even
                 after local pre-aggregation. The problem is that the
                 repartition-based algorithm for high-cardinality
                 aggregation does not fully utilize the network. In this
                 work, we first formulate a mathematical model that
                 captures the performance of parallel aggregation. We
                 prove that finding optimal aggregation plans from a
                 known data distribution is NP-hard, assuming the Small
                 Set Expansion conjecture. We propose GRASP, a GReedy
                 Aggregation Scheduling Protocol that decomposes
                 parallel aggregation into phases. GRASP is
                 distribution-aware as it aggregates the most similar
                 partitions in each phase to reduce the transmitted data
                 size in subsequent phases. In addition, GRASP takes the
                 available network bandwidth into account when
                 scheduling aggregations in each phase to maximize
                 network utilization. The experimental evaluation on
                 real data shows that GRASP outperforms
                 repartition-based aggregation by 3.5x and LOOM by
                 2.0x.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bater:2018:SES,
  author =       "Johes Bater and Xi He and William Ehrich and Ashwin
                 Machanavajjhala and Jennie Rogers",
  title =        "{Shrinkwrap}: efficient {SQL} query processing in
                 differentially private data federations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "3",
  pages =        "307--320",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3291264.3291274",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jan 18 05:54:04 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A private data federation is a set of autonomous
                 databases that share a unified query interface offering
                 in-situ evaluation of SQL queries over the union of the
                 sensitive data of its members. Owing to privacy
                 concerns, these systems do not have a trusted data
                 collector that can see all their data and their member
                 databases cannot learn about individual records of
                 other engines. Federations currently achieve this goal
                 by evaluating queries obliviously using secure
                 multiparty computation. This hides the intermediate
                 result cardinality of each query operator by
                 exhaustively padding it. With cascades of such
                 operators, this padding accumulates to a blow-up in the
                 output size of each operator and a proportional loss in
                 query performance. Hence, existing private data
                 federations do not scale well to complex SQL queries
                 over large datasets. We introduce Shrinkwrap, a private
                 data federation that offers data owners a
                 differentially private view of the data held by others
                 to improve their performance over oblivious query
                 processing. Shrinkwrap uses computational differential
                 privacy to minimize the padding of intermediate query
                 results, achieving up to a 35X performance improvement
                 over oblivious query processing. When the query needs
                 differentially private output, Shrinkwrap provides a
                 trade-off between result accuracy and query evaluation
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gill:2018:SPP,
  author =       "Gurbinder Gill and Roshan Dathathri and Loc Hoang and
                 Keshav Pingali",
  title =        "A study of partitioning policies for graph analytics
                 on large-scale distributed platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "321--334",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297754",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed-memory clusters are used for in-memory
                 processing of very large graphs with billions of nodes
                 and edges. This requires partitioning the graph among
                 the machines in the cluster. When a graph is
                 partitioned, a node in the graph may be replicated on
                 several machines, and communication is required to keep
                 these replicas synchronized. Good partitioning policies
                 attempt to reduce this synchronization overhead while
                 keeping the computational load balanced across
                 machines. A number of recent studies have looked at
                 ways to control replication of nodes, but these studies
                 are not conclusive because they were performed on small
                 clusters with eight to sixteen machines, did not
                 consider work-efficient data-driven algorithms, or did
                 not optimize communication for the partitioning
                 strategies they studied. This paper presents an
                 experimental study of partitioning strategies for
                 work-efficient graph analytics applications on large
                 KNL and Skylake clusters with up to 256 machines using
                 the Gluon communication runtime which implements
                 partitioning-specific communication optimizations.
                 Evaluation results show that although simple
                 partitioning strategies like Edge-Cuts perform well on
                 a small number of machines, an alternative partitioning
                 strategy called Cartesian Vertex-Cut (CVC) performs
                 better at scale even though paradoxically it has a
                 higher replication factor and performs more
                 communication than Edge-Cut partitioning does. Results
                 from communication micro-benchmarks resolve this
                 paradox by showing that communication overhead depends
                 not only on communication volume but also on the
                 communication pattern among the partitions. These
                 experiments suggest that high-performance graph
                 analytics systems should support multiple partitioning
                 strategies, like Gluon does, as no single graph
                 partitioning strategy is best for all cluster sizes.
                 For such systems, a decision tree for selecting a good
                 partitioning strategy based on characteristics of the
                 computation and the cluster is presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kumar:2018:UDG,
  author =       "K. Ashwin Kumar and Petros Efstathopoulos",
  title =        "Utility-driven graph summarization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "335--347",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297755",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A lot of the large datasets analyzed today represent
                 graphs. In many real-world applications, summarizing
                 large graphs is beneficial (or necessary) so as to
                 reduce a graph's size and, thus, achieve a number of
                 benefits, including but not limited to (1) significant
                 speed-up for graph algorithms, (2) graph storage space
                 reduction, (3) faster network transmission, (4)
                 improved data privacy, (5) more effective graph
                 visualization, etc. During the summarization process,
                 potentially useful information is removed from the
                 graph (nodes and edges are removed or transformed).
                 Consequently, one important problem with graph
                 summarization is that, although it reduces the size of
                 the input graph, it also adversely affects and reduces
                 its utility. The key question that we pose in this
                 paper is, can we summarize and compress a graph while
                 ensuring that its utility or usefulness does not drop
                 below a certain user-specified utility threshold? We
                 explore this question and propose a novel iterative
                 utility-driven graph summarization approach. During
                 iterative summarization, we incrementally keep track of
                 the utility of the graph summary. This enables a user
                 to query a graph summary that is conditioned on a
                 user-specified utility value. We present both
                 exhaustive and scalable approaches for implementing our
                 proposed solution. Our experimental results on
                 real-world graph datasets show the effectiveness of our
                 proposed approach. Finally, through multiple real-world
                 applications we demonstrate the practicality of our
                 notion of utility of the computed graph summary.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kara:2018:CCS,
  author =       "Kaan Kara and Ken Eguro and Ce Zhang and Gustavo
                 Alonso",
  title =        "{ColumnML}: column-store machine learning with
                 on-the-fly data transformation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "348--361",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297756",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ability to perform machine learning (ML) tasks in
                 a database management system (DBMS) provides the data
                 analyst with a powerful tool. Unfortunately,
                 integration of ML into a DBMS is challenging for
                 reasons varying from differences in execution model to
                 data layout requirements. In this paper, we assume a
                 column-store main-memory DBMS, optimized for online
                 analytical processing, as our initial system. On this
                 system, we explore the integration of
                 coordinate-descent based methods working natively on
                 columnar format to train generalized linear models. We
                 use a cache-efficient, partitioned stochastic
                 coordinate descent algorithm providing linear
                 throughput scalability with the number of cores while
                 preserving convergence quality, up to 14 cores in our
                 experiments. Existing column oriented DBMS rely on
                 compression and even encryption to store data in
                 memory. When those features are considered, the
                 performance of a CPU based solution suffers. Thus, in
                 the paper we also show how to exploit hardware
                 acceleration as part of a hybrid CPU+FPGA system to
                 provide on-the-fly data transformation combined with an
                 FPGA-based coordinate-descent engine. The resulting
                 system is a column-store DBMS with its important
                 features preserved (e.g., data compression) that offers
                 high performance machine learning capabilities.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2018:CED,
  author =       "Yanying Li and Haipei Sun and Boxiang Dong and Hui
                 (Wendy) Wang",
  title =        "Cost-efficient data acquisition on online data
                 marketplaces for correlation analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "362--375",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297757",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Incentivized by the enormous economic profits, the
                 data marketplace platform has been proliferated
                 recently. In this paper, we consider the data
                 marketplace setting where a data shopper would like to
                 buy data instances from the data marketplace for
                 correlation analysis of certain attributes. We assume
                 that the data in the marketplace is dirty and not free.
                 The goal is to find the data instances from a large
                 number of datasets in the marketplace whose join result
                 not only is of high-quality and rich join
                 informativeness, but also delivers the best correlation
                 between the requested attributes. To achieve this goal,
                 we design DANCE, a middleware that provides the desired
                 data acquisition service. DANCE consists of two phases:
                 (1) In the off-line phase, it constructs a two-layer
                 join graph from samples. The join graph includes the
                 information of the datasets in the marketplace at both
                 schema and instance levels; (2) In the online phase, it
                 searches for the data instances that satisfy the
                 constraints of data quality, budget, and join
                 informativeness, while maximizing the correlation of
                 source and target attribute sets. We prove that the
                 complexity of the search problem is NP-hard, and design
                 a heuristic algorithm based on Markov chain Monte Carlo
                 (MCMC). Experiment results on two benchmark and one
                 real datasets demonstrate the efficiency and
                 effectiveness of our heuristic data acquisition
                 algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dolatshah:2018:CCL,
  author =       "Mohamad Dolatshah and Mathew Teoh and Jiannan Wang and
                 Jian Pei",
  title =        "Cleaning crowdsourced labels using oracles for
                 statistical classification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "376--389",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297758",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Nowadays, crowdsourcing is being widely used to
                 collect training data for solving classification
                 problems. However, crowdsourced labels are often noisy,
                 and there is a performance gap between classification
                 with noisy labels and classification with ground-truth
                 labels. In this paper, we consider how to apply
                 oracle-based label cleaning to reduce the gap. We
                 propose TARS, a label-cleaning advisor that can provide
                 two pieces of valuable advice for data scientists when
                 they need to train or test a model using noisy labels.
                 Firstly, in the model testing stage, given a test
                 dataset with noisy labels, and a classification model,
                 TARS can use the test data to estimate how well the
                 model will perform w.r.t. ground-truth labels.
                 Secondly, in the model training stage, given a training
                 dataset with noisy labels, and a classification
                 algorithm, TARS can determine which label should be
                 sent to an oracle to clean such that the model can be
                 improved the most. For the first advice, we propose an
                 effective estimation technique, and study how to
                 compute confidence intervals to bound its estimation
                 error. For the second advice, we propose a novel
                 cleaning strategy along with two optimization
                 techniques, and illustrate that it is superior to the
                 existing cleaning strategies. We evaluate TARS on both
                 simulated and real-world datasets. The results show
                 that (1) TARS can use noisy test data to accurately
                 estimate a model's true performance for various
                 evaluation metrics; and (2) TARS can improve the model
                 accuracy by a larger margin than the existing cleaning
                 strategies, for the same cleaning budget.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lissandrini:2018:BMM,
  author =       "Matteo Lissandrini and Martin Brugnara and Yannis
                 Velegrakis",
  title =        "Beyond macrobenchmarks: microbenchmark-based graph
                 database evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "390--403",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297759",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Despite the increasing interest in graph databases
                 their requirements and specifications are not yet fully
                 understood by everyone, leading to a great deal of
                 variation in the supported functionalities and the
                 achieved performances. In this work, we provide a
                 comprehensive study of the existing graph database
                 systems. We introduce a novel microbenchmarking
                 framework that provides insights on their performance
                 that go beyond what macro-benchmarks can offer. The
                 framework includes the largest set of queries and
                 operators so far considered. The graph database systems
                 are evaluated on synthetic and real data, from
                 different domains, and at scales much larger than any
                 previous work. The framework is materialized as an
                 open-source suite and is easily extended to new
                 datasets, systems, and queries$^1$.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Balegas:2018:IIP,
  author =       "Valter Balegas and S{\'e}rgio Duarte and Carla
                 Ferreira and Rodrigo Rodrigues and Nuno
                 Pregui{\c{c}}a",
  title =        "{IPA}: invariant-preserving applications for weakly
                 consistent replicated databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "404--418",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297760",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "It is common to use weakly consistent replication to
                 achieve high availability and low latency at a global
                 scale. In this setting, concurrent updates may lead to
                 states where application invariants do not hold. Some
                 systems coordinate the execution of (conflicting)
                 operations to avoid invariant violations, leading to
                 high latency and reduced availability for those
                 operations. This problem is worsened by the difficulty
                 in identifying precisely which operations conflict. In
                 this paper we propose a novel approach to preserve
                 application invariants without coordinating the
                 execution of operations. The approach consists of
                 modifying operations in a way that application
                 invariants are maintained in the presence of concurrent
                 updates. When no conflicting updates occur, the
                 modified operations present their original semantics.
                 Otherwise, we use sensible and deterministic conflict
                 resolution policies that preserve the invariants of the
                 application. To implement this approach, we developed a
                 static analysis, IPA, that identifies conflicting
                 operations and proposes the necessary modifications to
                 operations. Our analysis shows that IPA can avoid
                 invariant violations in many applications, including
                 typical database applications. Our evaluation reveals
                 that the offline static analysis runs fast enough for
                 being used with large applications. The overhead
                 introduced in the modified operations is low and it
                 leads to lower latency and higher throughput when
                 compared with other approaches that enforce
                 invariants.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abuzaid:2018:DRI,
  author =       "Firas Abuzaid and Peter Kraft and Sahaana Suri and
                 Edward Gan and Eric Xu and Atul Shenoy and Asvin
                 Ananthanarayan and John Sheu and Erik Meijer and Xi Wu
                 and Jeff Naughton and Peter Bailis and Matei Zaharia",
  title =        "{DIFF}: a relational interface for large-scale data
                 explanation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "419--432",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297761",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A range of explanation engines assist data analysts by
                 performing feature selection over increasingly
                 high-volume and high-dimensional data, grouping and
                 highlighting commonalities among data points. While
                 useful in diverse tasks such as user behavior
                 analytics, operational event processing, and root cause
                 analysis, today's explanation engines are designed as
                 standalone data processing tools that do not
                 interoperate with traditional, SQL-based analytics
                 workflows; this limits the applicability and
                 extensibility of these engines. In response, we propose
                 the DIFF operator, a relational aggregation operator
                 that unifies the core functionality of these engines
                 with declarative relational query processing. We
                 implement both single-node and distributed versions of
                 the DIFF operator in MB SQL, an extension of MacroBase,
                 and demonstrate how DIFF can provide the same semantics
                 as existing explanation engines while capturing a broad
                 set of production use cases in industry, including at
                 Microsoft and Facebook. Additionally, we illustrate how
                 this declarative approach to data explanation enables
                 new logical and physical query optimizations. We
                 evaluate these optimizations on several real-world
                 production applications, and find that DIFF in MB SQL
                 can outperform state-of-the-art engines by up to an
                 order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Basat:2018:SFI,
  author =       "Ran {Ben Basat} and Roy Friedman and Rana Shahout",
  title =        "Stream frequency over interval queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "433--445",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297762",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Stream frequency measurements are fundamental in many
                 data stream applications such as financial data
                 trackers, intrusion-detection systems, and network
                 monitoring. Typically, recent data items are more
                 relevant than old ones, a notion we can capture through
                 a sliding window abstraction. This paper considers a
                 generalized sliding window model that supports stream
                 frequency queries over an interval given at query time.
                 This enables drill-down queries, in which we can
                 examine the behavior of the system in finer and finer
                 granularities. For this model, we asymptotically
                 improve the space bounds of existing work, reduce the
                 update and query time to a constant, and provide
                 deterministic solutions. When evaluated over real
                 Internet packet traces, our fastest algorithm processes
                 items 90--250 times faster, serves queries at least 730
                 times quicker and consumes at least 40\% less space
                 than the best known method.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xin:2018:HHO,
  author =       "Doris Xin and Stephen Macke and Litian Ma and Jialin
                 Liu and Shuchen Song and Aditya Parameswaran",
  title =        "{HELIX}: holistic optimization for accelerating
                 iterative machine learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "4",
  pages =        "446--460",
  month =        dec,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3297753.3297763",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Machine learning workflow development is a process of
                 trial-and-error: developers iterate on workflows by
                 testing out small modifications until the desired
                 accuracy is achieved. Unfortunately, existing machine
                 learning systems focus narrowly on model training---a
                 small fraction of the overall development time---and
                 neglect to address iterative development. We propose H
                 elix, a machine learning system that optimizes the
                 execution across iterations ---intelligently caching
                 and reusing, or recomputing intermediates as
                 appropriate. Helix captures a wide variety of
                 application needs within its Scala DSL, with succinct
                 syntax defining unified processes for data
                 preprocessing, model specification, and learning. We
                 demonstrate that the reuse problem can be cast as a
                 Max-Flow problem, while the caching problem is NP-Hard.
                 We develop effective lightweight heuristics for the
                 latter. Empirical evaluation shows that Helix is not
                 only able to handle a wide variety of use cases in one
                 unified workflow but also much faster, providing run
                 time reductions of up to 19x over state-of-the-art
                 systems, such as DeepDive or KeystoneML, on four
                 real-world applications in natural language processing,
                 computer vision, social and natural sciences.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fu:2019:FAN,
  author =       "Cong Fu and Chao Xiang and Changxu Wang and Deng Cai",
  title =        "Fast approximate nearest neighbor search with the
                 navigating spreading-out graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "461--474",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303754",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Approximate nearest neighbor search (ANNS) is a
                 fundamental problem in databases and data mining. A
                 scalable ANNS algorithm should be both memory-efficient
                 and fast. Some early graph-based approaches have shown
                 attractive theoretical guarantees on search time
                 complexity, but they all suffer from the problem of
                 high indexing time complexity. Recently, some
                 graph-based methods have been proposed to reduce
                 indexing complexity by approximating the traditional
                 graphs; these methods have achieved revolutionary
                 performance on million-scale datasets. Yet, they still
                 can not scale to billion-node databases. In this paper,
                 to further improve the search-efficiency and
                 scalability of graph-based methods, we start by
                 introducing four aspects: (1) ensuring the connectivity
                 of the graph; (2) lowering the average out-degree of
                 the graph for fast traversal; (3) shortening the search
                 path; and (4) reducing the index size. Then, we propose
                 a novel graph structure called Monotonic Relative
                 Neighborhood Graph (MRNG) which guarantees very low
                 search complexity (close to logarithmic time). To
                 further lower the indexing complexity and make it
                 practical for billion-node ANNS problems, we propose a
                 novel graph structure named Navigating Spreading-out
                 Graph (NSG) by approximating the MRNG. The NSG takes
                 the four aspects into account simultaneously. Extensive
                 experiments show that NSG outperforms all the existing
                 algorithms significantly. In addition, NSG shows
                 superior performance in the E-commercial scenario of
                 Taobao (Alibaba Group) and has been integrated into
                 their billion-scale search engine.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:DRF,
  author =       "Qi Wang and Torsten Suel",
  title =        "Document reordering for faster intersection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "475--487",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303755",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A lot of research has studied how to optimize inverted
                 index structures in search engines through suitable
                 reassignment of document identifiers. This approach was
                 originally proposed to allow for better compression of
                 the index, but subsequent work showed that it can also
                 result in significant speed-ups for conjunctive queries
                 and even certain types of disjunctive top-k algorithms.
                 However, we do not have a good understanding of why
                 this happens, and how we could directly optimize an
                 index for query processing speed. As a result, existing
                 techniques attempt to optimize for size, and treat
                 speed increases as a welcome side-effect. In this
                 paper, we take an initial but important step towards
                 understanding and modeling speed increases due to
                 document reordering. We define the problem of
                 minimizing the cost of queries given an inverted index
                 and a query distribution, relate it to work on adaptive
                 set intersection, and show that it is fundamentally
                 different from that of minimizing compressed index
                 size. We then propose a heuristic algorithm for finding
                 a document reordering that minimizes query processing
                 costs under suitable cost models. Our experiments show
                 significant increases in the speed of intersections
                 over state-of-the-art reordering techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2019:CCS,
  author =       "Xiaofei Zhang and M. Tamer {\"O}zsu",
  title =        "Correlation constraint shortest path over large
                 multi-relation graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "488--501",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303756",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Multi-relation graphs intuitively capture the
                 heterogeneous correlations among real-world entities by
                 allowing multiple types of relationships to be
                 represented as entity-connecting edges, i.e., two
                 entities could be correlated with more than one type of
                 relationship. This is important in various applications
                 such as social network analysis, ecology, and
                 bio-informatics. Existing studies on these graphs
                 usually consider an edge label constraint perspective,
                 where each edge contains only one label and each edge
                 is considered independently. For example, there are
                 lines of research focusing on reachability between two
                 vertices under a set of edge label constraints, or
                 finding paths whose consecutive edge labels satisfy a
                 user-specified logical expression. This is too
                 restricted in real graphs, and in this work, we define
                 a generic correlation constraint on multi-relation
                 graphs from the perspective of vertex correlations,
                 where a correlation can be defined recursively.
                 Specifically, we formalize and investigate the shortest
                 path problem over large multi-relation graphs in the
                 presence of both necessity and denial constraints,
                 which have various real applications. We show that it
                 is nontrivial to apply conventional graph traversal
                 algorithms (e.g., BFS or DFS) to address the challenge.
                 To effectively reduce the search space, we propose a
                 Hybrid Relation Encoding method, a.k.a. HyRE, to encode
                 both topological and relation information in a compact
                 way. We conduct extensive experiments over large
                 real-world graphs to validate the effectiveness and
                 efficiency of the proposed solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lang:2019:POF,
  author =       "Harald Lang and Thomas Neumann and Alfons Kemper and
                 Peter Boncz",
  title =        "Performance-optimal filtering: {Bloom} overtakes
                 {Cuckoo} at high throughput",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "502--515",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303757",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We define the concept of performance-optimal filtering
                 to indicate the Bloom or Cuckoo filter configuration
                 that best accelerates a particular task. While the
                 space-precision tradeoff of these filters has been well
                 studied, we show how to pick a filter that maximizes
                 the performance for a given workload. This choice might
                 be ``suboptimal'' relative to traditional
                 space-precision metrics, but it will lead to better
                 performance in practice. In this paper, we focus on
                 high-throughput filter use cases, aimed at avoiding CPU
                 work, e.g., a cache miss, a network message, or a local
                 disk I/O --- events that can happen at rates of
                 millions to hundreds per second. Besides the
                 false-positive rate and memory footprint of the filter,
                 performance optimality has to take into account the
                 absolute cost of the filter lookup as well as the saved
                 work per lookup that filtering avoids; while the actual
                 rate of negative lookups in the workload determines
                 whether using a filter improves overall performance at
                 all. In the course of the paper, we introduce new
                 filter variants, namely the register-blocked and
                 cache-sectorized Bloom filters. We present new
                 implementation techniques and perform an extensive
                 evaluation on modern hardware platforms, including the
                 wide-SIMD Skylake-X and Knights Landing. This
                 experimentation shows that in high-throughput
                 situations, the lower lookup cost of blocked Bloom
                 filters allows them to overtake Cuckoo filters.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeuch:2019:AES,
  author =       "Steffen Zeuch and Bonaventura {Del Monte} and Jeyhun
                 Karimov and Clemens Lutz and Manuel Renz and Jonas
                 Traub and Sebastian Bre{\ss} and Tilmann Rabl and
                 Volker Markl",
  title =        "Analyzing efficient stream processing on modern
                 hardware",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "516--530",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303758",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern Stream Processing Engines (SPEs) process large
                 data volumes under tight latency constraints. Many SPEs
                 execute processing pipelines using message passing on
                 shared-nothing architectures and apply a
                 partition-based scale-out strategy to handle
                 high-velocity input streams. Furthermore, many
                 state-of-the-art SPEs rely on a Java Virtual Machine to
                 achieve platform independence and speed up system
                 development by abstracting from the underlying
                 hardware. In this paper, we show that taking the
                 underlying hardware into account is essential to
                 exploit modern hardware efficiently. To this end, we
                 conduct an extensive experimental analysis of current
                 SPEs and SPE design alternatives optimized for modern
                 hardware. Our analysis highlights potential bottlenecks
                 and reveals that state-of-the-art SPEs are not capable
                 of fully exploiting current and emerging hardware
                 trends, such as multi-core processors and high-speed
                 networks. Based on our analysis, we describe a set of
                 design changes to the common architecture of SPEs to
                 scale-up on modern hardware. We show that the
                 single-node throughput can be increased by up to two
                 orders of magnitude compared to state-of-the-art SPEs
                 by applying specialized code generation, fusing
                 operators, batch-style parallelization strategies, and
                 optimized windowing. This speedup allows for deploying
                 typical streaming applications on a single or a few
                 nodes instead of large clusters.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Luo:2019:EDI,
  author =       "Chen Luo and Michael J. Carey",
  title =        "Efficient data ingestion and query processing for
                 {LSM}-based storage systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "531--543",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303759",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In recent years, the Log Structured Merge (LSM) tree
                 has been widely adopted by NoSQL and NewSQL systems for
                 its superior write performance. Despite its popularity,
                 however, most existing work has focused on LSM-based
                 key--value stores with only a single LSM-tree;
                 auxiliary structures, which are critical for supporting
                 ad-hoc queries, have received much less attention. In
                 this paper, we focus on efficient data ingestion and
                 query processing for general-purpose LSM-based storage
                 systems. We first propose and evaluate a series of
                 optimizations for efficient batched point lookups,
                 significantly improving the range of applicability of
                 LSM-based secondary indexes. We then present several
                 new and efficient maintenance strategies for LSM-based
                 storage systems. Finally, we have implemented and
                 experimentally evaluated the proposed techniques in the
                 context of the Apache AsterixDB system, and we present
                 the results here.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chrysogelos:2019:HEH,
  author =       "Periklis Chrysogelos and Manos Karpathiotakis and Raja
                 Appuswamy and Anastasia Ailamaki",
  title =        "{HetExchange}: encapsulating heterogeneous {CPU--GPU}
                 parallelism in {JIT} compiled engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "544--556",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303760",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern server hardware is increasingly heterogeneous
                 as hardware accelerators, such as GPUs, are used
                 together with multicore CPUs to meet the computational
                 demands of modern data analytics work-loads.
                 Unfortunately, query parallelization techniques used by
                 analytical database engines are designed for
                 homogeneous multicore servers, where query plans are
                 parallelized across CPUs to process data stored in
                 cache coherent shared memory. Thus, these techniques
                 are unable to fully exploit available heterogeneous
                 hardware, where one needs to exploit task-parallelism
                 of CPUs and data-parallelism of GPUs for processing
                 data stored in a deep, non-cache-coherent memory
                 hierarchy with widely varying access latencies and
                 bandwidth. In this paper, we introduce HetExchange-a
                 parallel query execution framework that encapsulates
                 the heterogeneous parallelism of modern
                 multi-CPU-multi-GPU servers and enables the
                 parallelization of (pre-)existing sequential relational
                 operators. In contrast to the interpreted nature of
                 traditional Exchange, HetExchange is designed to be
                 used in conjunction with JIT compiled engines in order
                 to allow a tight integration with the proposed
                 operators and generation of efficient code for
                 heterogeneous hardware. We validate the applicability
                 and efficiency of our design by building a prototype
                 that can operate over both CPUs and GPUs, and enables
                 its operators to be parallelism- and
                 data-location-agnostic. In doing so, we show that
                 efficiently exploiting CPU-GPU parallelism can provide
                 2.8x and 6.4x improvement in performance compared to
                 state-of-the-art CPU-based and GPU-based DBMS.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Atzeni:2019:MMS,
  author =       "Paolo Atzeni and Luigi Bellomarini and Paolo Papotti
                 and Riccardo Torlone",
  title =        "Meta-mappings for schema mapping reuse",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "557--569",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303761",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The definition of mappings between heterogeneous
                 schemas is a critical activity of any database
                 application. Existing tools provide high level
                 interfaces for the discovery of correspondences between
                 elements of schemas, but schema mappings need to be
                 manually specified every time from scratch, even if the
                 scenario at hand is similar to one that has already
                 been addressed. The problem is that schema mappings are
                 precisely defined over a pair of schemas and cannot
                 directly be reused on different scenarios. We tackle
                 this challenge by generalizing schema mappings as
                 meta-mappings: formalisms that describe transformations
                 between generic data structures called meta-schemas. We
                 formally characterize schema mapping reuse and explain
                 how meta-mappings are able to: (i) capture enterprise
                 knowledge from previously defined schema mappings and
                 (ii) use this knowledge to suggest new mappings. We
                 develop techniques to infer meta-mappings from existing
                 mappings, to organize them into a searchable
                 repository, and to leverage the repository to propose
                 to users mappings suitable for their needs. We study
                 effectiveness and efficiency in an extensive evaluation
                 over real-world scenarios and show that our system can
                 infer, store, and search millions of meta-mappings in
                 seconds.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2019:EEG,
  author =       "Lijie Xu and Tian Guo and Wensheng Dou and Wei Wang
                 and Jun Wei",
  title =        "An experimental evaluation of garbage collectors on
                 big data applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "570--583",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303762",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Popular big data frameworks, ranging from Hadoop
                 MapReduce to Spark, rely on garbage-collected
                 languages, such as Java and Scala. Big data
                 applications are especially sensitive to the
                 effectiveness of garbage collection (i.e., GC), because
                 they usually process a large volume of data objects
                 that lead to heavy GC overhead. Lacking in-depth
                 understanding of GC performance has impeded performance
                 improvement in big data applications. In this paper, we
                 conduct the first comprehensive evaluation on three
                 popular garbage collectors, i.e., Parallel, CMS, and
                 G1, using four representative Spark applications. By
                 thoroughly investigating the correlation between these
                 big data applications' memory usage patterns and the
                 collectors' GC patterns, we obtain many findings about
                 GC inefficiencies. We further propose empirical
                 guidelines for application developers, and insightful
                 optimization strategies for designing big-data-friendly
                 garbage collectors.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Guo:2019:AOC,
  author =       "Jinwei Guo and Peng Cai and Jiahao Wang and Weining
                 Qian and Aoying Zhou",
  title =        "Adaptive optimistic concurrency control for
                 heterogeneous workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "584--596",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303763",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Optimistic concurrency control (OCC) protocols
                 validate whether a transaction has conflicts with other
                 concurrent transactions after this transaction
                 completes its execution. In this work, we demonstrate
                 that the validation phase has a great influence on the
                 performance of modern in-memory database systems,
                 especially under heterogeneous workloads. The cost of
                 validating operations in a transaction is determined by
                 two main factors. The first factor is the operation
                 type. An OCC protocol would take much less cost on
                 validating a single-record read operation than
                 validating a key-range scan operation. The second
                 factor is the workload type. Existing schemes in OCC
                 variants for validating key-range scan perform
                 differently under various workloads. Although various
                 validation schemes share the same goal of guaranteeing
                 a transaction schedule to be serializable, there are
                 remarkable differences between the costs they
                 introduced. These observations motivate us to design an
                 optimistic concurrency control which can choose a
                 low-cost validation scheme at runtime, referred to as
                 adaptive optimistic concurrency control (AOCC). First,
                 at transaction-level granularity, AOCC can assign a
                 validation method to a transaction according to the
                 features of its operations. Furthermore, for each
                 operation in a transaction, the validation method is
                 selected according to not only the number of accessed
                 records but also the instant characteristics of
                 workloads. Experimental results show that AOCC has good
                 performance and scalability under heterogeneous
                 workloads mixed with point accesses and predicate
                 queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lin:2019:MTC,
  author =       "Yu-Shan Lin and Shao-Kan Pi and Meng-Kai Liao and
                 Ching Tsai and Aaron Elmore and Shan-Hung Wu",
  title =        "{MgCrab}: transaction crabbing for live migration in
                 deterministic database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "597--610",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303764",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent deterministic database systems have achieved
                 high scalability and high availability in distributed
                 environments given OLTP workloads. However, modern OLTP
                 applications usually have changing workloads or access
                 patterns, so how to make the resource provisioning
                 elastic to the changing workloads becomes an important
                 design goal for a deterministic database system. Live
                 migration, which moves the specified data from a source
                 machine to a destination node while continuously
                 serving the incoming transactions, is a key technique
                 required for the elasticity. In this paper, we present
                 MgCrab, a live migration technique for a deterministic
                 database system, that leverages the determinism to
                 maintain the consistency of data on the source and
                 destination nodes at very low cost during a migration
                 period. We implement MgCrab on an open-source database
                 system. Extensive experiments were conducted and the
                 results demonstrate the effectiveness of MgCrab.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Maiyya:2019:UCA,
  author =       "Sujaya Maiyya and Faisal Nawab and Divyakant Agrawal
                 and Amr {El Abbadi}",
  title =        "Unifying consensus and atomic commitment for effective
                 cloud data management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "5",
  pages =        "611--623",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3303753.3303765",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Feb 27 14:03:31 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See errata \cite{Maiyya:2021:EUC}.",
  abstract =     "Data storage in the Cloud needs to be scalable and
                 fault-tolerant. Atomic commitment protocols such as Two
                 Phase Commit (2PC) provide ACID guarantees for
                 transactional access to sharded data and help in
                 achieving scalability. Whereas consensus protocols such
                 as Paxos consistently replicate data across different
                 servers and provide fault tolerance. Cloud based
                 datacenters today typically treat the problems of
                 scalability and fault-tolerance disjointedly. In this
                 work, we propose a unification of these two different
                 paradigms into one framework called Consensus and
                 Commitment (C\&C) framework. The C\&C framework can
                 model existing and well known data management protocols
                 as well as propose new ones. We demonstrate the
                 advantages of the C\&C framework by developing a new
                 atomic commitment protocol, Paxos Atomic Commit (PAC),
                 which integrates commitment with recovery in a
                 Paxos-like manner. We also instantiate commit protocols
                 from the C\&C framework catered to different Cloud data
                 management techniques. In particular, we propose a
                 novel protocol, Generalized PAC (G-PAC) that integrates
                 atomic commitment and fault tolerance in a cloud
                 paradigm involving both sharding and replication of
                 data. We compare the performance of G-PAC with a
                 Spanner-like protocol, where 2PC is used at the logical
                 data level and Paxos is used for consistent replication
                 of logical data. The experimental results highlight the
                 benefits of combining consensus along with commitment
                 into a single integrated protocol.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2019:ATC,
  author =       "Chenggang Wu and Vikram Sreekanti and Joseph M.
                 Hellerstein",
  title =        "Autoscaling tiered cloud storage in {Anna}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "6",
  pages =        "624--638",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3311880.3311881",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we describe how we extended a
                 distributed key--value store called Anna into an
                 autoscaling, multi-tier service for the cloud. In its
                 extended form, Anna is designed to overcome the narrow
                 cost-performance limitations typical of current cloud
                 storage systems. We describe three key aspects of
                 Anna's new design: multi-master selective replication
                 of hot keys, a vertical tiering of storage layers with
                 different cost-performance tradeoffs, and horizontal
                 elasticity of each tier to add and remove nodes in
                 response to load dynamics. Anna's policy engine uses
                 these mechanisms to balance service-level objectives
                 around cost, latency and fault tolerance. Experimental
                 results explore the behavior of Anna's mechanisms and
                 policy, exhibiting orders of magnitude efficiency
                 improvements over both commodity cloud KVS services and
                 research systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dignos:2019:SST,
  author =       "Anton Dign{\"o}s and Boris Glavic and Xing Niu and
                 Michael B{\"o}hlen and Johann Gamper",
  title =        "Snapshot semantics for temporal multiset relations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "6",
  pages =        "639--652",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3311880.3311882",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Snapshot semantics is widely used for evaluating
                 queries over temporal data: temporal relations are seen
                 as sequences of snapshot relations, and queries are
                 evaluated at each snapshot. In this work, we
                 demonstrate that current approaches for snapshot
                 semantics over interval-timestamped multiset relations
                 are subject to two bugs regarding snapshot aggregation
                 and bag difference. We introduce a novel temporal data
                 model based on K -relations that overcomes these bugs
                 and prove it to correctly encode snapshot semantics.
                 Furthermore, we present an efficient implementation of
                 our model as a database middleware and demonstrate
                 experimentally that our approach is competitive with
                 native implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kwashie:2019:CEE,
  author =       "Selasi Kwashie and Lin Liu and Jixue Liu and Markus
                 Stumptner and Jiuyong Li and Lujing Yang",
  title =        "{Certus}: an effective entity resolution approach with
                 graph differential dependencies {(GDDs)}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "6",
  pages =        "653--666",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3311880.3311883",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity resolution (ER) is the problem of accurately
                 identifying multiple, differing, and possibly
                 contradicting representations of unique real-world
                 entities in data. It is a challenging and fundamental
                 task in data cleansing and data integration. In this
                 work, we propose graph differential dependencies (GDDs)
                 as an extension of the recently developed graph entity
                 dependencies (which are formal constraints for graph
                 data) to enable approximate matching of values.
                 Furthermore, we investigate a special discovery of GDDs
                 for ER by designing an algorithm for generating a
                 non-redundant set of GDDs in labelled data. Then, we
                 develop an effective ER technique, Certus, that employs
                 the learned GDDs for improving the accuracy of ER
                 results. We perform extensive empirical evaluation of
                 our proposals on five real-world ER benchmark datasets
                 and a proprietary database to test their effectiveness
                 and efficiency. The results from the experiments show
                 the discovery algorithm and Certus are efficient; and
                 more importantly, GDDs significantly improve the
                 precision of ER without considerable trade-off of
                 recall.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Han:2019:EEA,
  author =       "Kai Han and Fei Gui and Xiaokui Xiao and Jing Tang and
                 Yuntian He and Zongmai Cao and He Huang",
  title =        "Efficient and effective algorithms for clustering
                 uncertain graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "6",
  pages =        "667--680",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3311880.3311884",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We consider the edge uncertainty in an undirected
                 graph and study the k -median (resp. k -center)
                 problems, where the goal is to partition the graph
                 nodes into k clusters such that the average (resp.
                 minimum) connection probability between each node and
                 its cluster's center is maximized. We analyze the
                 hardness of these problems, and propose algorithms that
                 provide considerably improved approximation guarantees
                 than the existing studies do. Specifically, our
                 algorithms offer (1 --- 1/e)-approximations for the k
                 -median problem and (OPTck)-approximations for the k
                 -center problem, where OPTck is the optimal objective
                 function value for k -center. In addition, our
                 algorithms incorporate several non-trivial
                 optimizations that significantly enhance their
                 practical efficiency. Extensive experimental results
                 demonstrate that our algorithms considerably outperform
                 the existing methods on both computation efficiency and
                 the quality of clustering results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zou:2019:PMD,
  author =       "Jia Zou and Arun Iyengar and Chris Jermaine",
  title =        "{Pangea}: monolithic distributed storage for data
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "6",
  pages =        "681--694",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3311880.3311885",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Storage and memory systems for modern data analytics
                 are heavily layered, managing shared persistent data,
                 cached data, and nonshared execution data in separate
                 systems such as a distributed file system like HDFS, an
                 in-memory file system like Alluxio, and a computation
                 framework like Spark. Such layering introduces
                 significant performance and management costs. In this
                 paper we propose a single system called Pangea that can
                 manage all data---both intermediate and long-lived
                 data, and their buffer/caching, data placement
                 optimization, and failure recovery---all in one
                 monolithic distributed storage system, without any
                 layering. We present a detailed performance evaluation
                 of Pangea and show that its performance compares
                 favorably with several widely used layered systems such
                 as Spark.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2019:SMD,
  author =       "Zhiwei Fan and Jianqiao Zhu and Zuyu Zhang and Aws
                 Albarghouthi and Paraschos Koutris and Jignesh M.
                 Patel",
  title =        "Scaling-up in-memory datalog processing: observations
                 and techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "6",
  pages =        "695--708",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3311880.3311886",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recursive query processing has experienced a recent
                 resurgence, as a result of its use in many modern
                 application domains, including data integration, graph
                 analytics, security, program analysis, networking and
                 decision making. Due to the large volumes of data being
                 processed, several research efforts across multiple
                 communities have explored how to scale up recursive
                 queries, typically expressed in Datalog. Our experience
                 with these tools indicate that their performance does
                 not translate across domains---e.g., a tool designed
                 for large-scale graph analytics does not exhibit the
                 same performance on program-analysis tasks, and vice
                 versa. Starting from the above observation, we make the
                 following two contributions. First, we perform a
                 detailed experimental evaluation comparing a number of
                 state-of-the-art Datalog systems on a wide spectrum of
                 graph analytics and program-analysis tasks, and
                 summarize the pros and cons of existing techniques.
                 Second, we design and implement our own general-purpose
                 Datalog engine, called RecStep, on top of a parallel
                 single-node relational system. We outline the
                 techniques we applied on RecStep, as well as the
                 contribution of each technique to the overall
                 performance. Using RecStep as a baseline, we
                 demonstrate that it generally out-performs
                 state-of-the-art parallel Datalog engines on complex
                 and large-scale Datalog evaluation, by a 4-6X margin.
                 An additional insight from our work is that it is
                 possible to build a high-performance Datalog system on
                 top of a relational engine, an idea that has been
                 dismissed in past work.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Archer:2019:CAL,
  author =       "Aaron Archer and Kevin Aydin and Mohammad Hossein
                 Bateni and Vahab Mirrokni and Aaron Schild and Ray Yang
                 and Richard Zhuang",
  title =        "Cache-aware load balancing of data center
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "6",
  pages =        "709--723",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3311880.3311887",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Our deployment of cache-aware load balancing in the
                 Google web search backend reduced cache misses by $
                 \approx $0.5x, contributing to a double-digit
                 percentage increase in the throughput of our serving
                 clusters by relieving a bottleneck. This innovation has
                 benefited all production workloads since 2015, serving
                 billions of queries daily. A load balancer forwards
                 each query to one of several identical serving
                 replicas. The replica pulls each term's postings list
                 into RAM from flash, either locally or over the
                 network. Flash bandwidth is a critical bottleneck,
                 motivating an application-directed RAM cache on each
                 replica. Sending the same term reliably to the same
                 replica would increase the chance it hits cache, and
                 avoid polluting the other replicas' caches. However,
                 most queries contain multiple terms and we have to send
                 the whole query to one replica, so it is not possible
                 to achieve a perfect partitioning of terms to replicas.
                 We solve this via a voting scheme, whereby the load
                 balancer conducts a weighted vote by the terms in each
                 query, and sends the query to the winning replica. We
                 develop a multi-stage scalable algorithm to learn these
                 weights. We first construct a large-scale term-query
                 graph from logs and apply a distributed balanced graph
                 partitioning algorithm to cluster each term to a
                 preferred replica. This yields a good but simplistic
                 initial voting table, which we then iteratively refine
                 via cache simulation to capture feedback effects.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Borkowski:2019:MCR,
  author =       "Michael Borkowski and Christoph Hochreiner and Stefan
                 Schulte",
  title =        "Minimizing cost by reducing scaling operations in
                 distributed stream processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "7",
  pages =        "724--737",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3317315.3317316",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Elastic distributed stream processing systems are able
                 to dynamically adapt to changes in the workload. Often,
                 these systems react to the rate of incoming data, or to
                 the level of resource utilization, by scaling up or
                 down. The goal is to optimize the system's resource
                 usage, thereby reducing its operational cost. However,
                 such scaling operations consume resources on their own,
                 introducing a certain overhead of resource usage, and
                 therefore cost, for every scaling operation. In
                 addition, migrations caused by scaling operations
                 inevitably lead to brief processing gaps. Therefore, an
                 excessive number of scaling operations should be
                 avoided. We approach this problem by preventing
                 unnecessary scaling operations and over-compensating
                 reactions to short-term changes in the workload. This
                 allows to maintain elasticity, while also minimizing
                 the incurred overhead cost of scaling operations. To
                 achieve this, we use advanced filtering techniques from
                 the field of signal processing to pre-process raw
                 system measurements, thus mitigating superfluous
                 scaling operations. We perform a real-world testbed
                 evaluation verifying the effects, and provide a
                 break-even cost analysis to show the economic
                 feasibility of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2019:PPB,
  author =       "Yinjun Wu and Abdussalam Alawini and Daniel Deutch and
                 Tova Milo and Susan Davidson",
  title =        "{ProvCite}: provenance-based data citation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "7",
  pages =        "738--751",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3317315.3317317",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As research products expand to include structured
                 datasets, the challenge arises of how to automatically
                 generate citations to the results of arbitrary queries
                 against such datasets. Previous work explored this
                 problem in the context of conjunctive queries and views
                 using a Rewriting-Based Model (RBM). However, an
                 increasing number of scientific queries are aggregate,
                 e.g. statistical summaries of the underlying data, for
                 which the RBM cannot be easily extended. In this paper,
                 we show how a Provenance-Based Model (PBM) can be
                 leveraged to (1) generate citations to conjunctive as
                 well as aggregate queries and views; (2) associate
                 citations with individual result tuples to enable
                 arbitrary subsets of the result set to be cited
                 (fine-grained citations); and (3) be optimized to
                 return citations in acceptable time. Our implementation
                 of PBM in ProvCite shows that it not only handles a
                 larger class of queries and views than RBM, but can
                 outperform it when restricted to conjunctive views in
                 some cases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2019:DCF,
  author =       "Wenfei Fan and Ping Lu and Chao Tian and Jingren
                 Zhou",
  title =        "Deducing certain fixes to graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "7",
  pages =        "752--765",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3317315.3317318",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper proposes to deduce certain fixes to graphs
                 G based on data quality rules \Sigma and ground truth
                 \Gamma ( i.e., validated attribute values and entity
                 matches). We fix errors detected by \Sigma in G such
                 that the fixes are assured correct as long as \Sigma
                 and \Gamma are correct. We deduce certain fixes in two
                 paradigms. (a) We interact with users and
                 ``incrementally'' fix errors online. Whenever users
                 pick a small set V$_0$ of nodes in G, we fix all errors
                 pertaining to V$_0$ and accumulate ground truth in the
                 process. (b) Based on accumulated \Gamma, we repair the
                 entire graph G offline; while this may not correct all
                 errors in G, all fixes are guaranteed certain. We
                 develop techniques for deducing certain fixes. (1) We
                 define data quality rules to support conditional
                 functional dependencies, recursively defined keys and
                 negative rules on graphs, such that we can deduce fixes
                 by combining data repairing and object identification.
                 (2) We show that deducing certain fixes is
                 Church--Rosser, i.e., the deduction converges at the
                 same fixes regardless of the order of rules applied.
                 (3) We establish the complexity of three fundamental
                 problems associated with certain fixes. (4) We provide
                 (parallel) algorithms for deducing certain fixes online
                 and offline, and guarantee to reduce running time when
                 given more processors. Using real-life and synthetic
                 data, we experimentally verify the effectiveness and
                 scalability of our methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ceccarello:2019:SCC,
  author =       "Matteo Ceccarello and Andrea Pietracaprina and Geppino
                 Pucci",
  title =        "Solving $k$-center clustering (with outliers) in
                 {MapReduce} and streaming, almost as accurately as
                 sequentially",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "7",
  pages =        "766--778",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3317315.3317319",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Center-based clustering is a fundamental primitive for
                 data analysis and becomes very challenging for large
                 datasets. In this paper, we focus on the popular k
                 center variant which, given a set S of points from some
                 metric space and a parameter k < | S |, requires to
                 identify a subset of k centers in S minimizing the
                 maximum distance of any point of S from its closest
                 center. A more general formulation, introduced to deal
                 with noisy datasets, features a further parameter z and
                 allows up to z points of S (outliers) to be disregarded
                 when computing the maximum distance from the centers.
                 We present coreset-based 2-round MapReduce algorithms
                 for the above two formulations of the problem, and a
                 1-pass Streaming algorithm for the case with outliers.
                 For any fixed \&amp;\#1013; \&gt; 0, the algorithms
                 yield solutions whose approximation ratios are a mere
                 additive term \&amp;\#1013; away from those achievable
                 by the best known polynomial-time sequential
                 algorithms, a result that substantially improves upon
                 the state of the art. Our algorithms are rather simple
                 and adapt to the intrinsic complexity of the dataset,
                 captured by the doubling dimension D of the metric
                 space. Specifically, our analysis shows that the
                 algorithms become very space-efficient for the
                 important case of small (constant) D. These theoretical
                 results are complemented with a set of experiments on
                 real-world and synthetic datasets of up to over a
                 billion points, which show that our algorithms yield
                 better quality solutions over the state of the art
                 while featuring excellent scalability, and that they
                 also lend themselves to sequential implementations much
                 faster than existing ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:EED,
  author =       "Xiaolan Wang and Alexandra Meliou",
  title =        "{Explain$3$D}: explaining disagreements in disjoint
                 datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "7",
  pages =        "779--792",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3317315.3317320",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data plays an important role in applications, analytic
                 processes, and many aspects of human activity. As data
                 grows in size and complexity, we are met with an
                 imperative need for tools that promote understanding
                 and explanations over data-related operations. Data
                 management research on explanations has focused on the
                 assumption that data resides in a single dataset, under
                 one common schema. But the reality of today's data is
                 that it is frequently unintegrated, coming from
                 different sources with different schemas. When
                 different datasets provide different answers to
                 semantically similar questions, understanding the
                 reasons for the discrepancies is challenging and cannot
                 be handled by the existing single-dataset solutions. In
                 this paper, we propose explain3D, a framework for
                 explaining the disagreements across disjoint datasets
                 (3D). Explain3D focuses on identifying the reasons for
                 the differences in the results of two semantically
                 similar queries operating on two datasets with
                 potentially different schemas. Our framework leverages
                 the queries to perform a semantic mapping across the
                 relevant parts of their provenance; discrepancies in
                 this mapping point to causes of the queries'
                 differences. Exploiting the queries gives explain3D an
                 edge over traditional schema matching and record
                 linkage techniques, which are query-agnostic. Our work
                 makes the following contributions: (1) We formalize the
                 problem of deriving optimal explanations for the
                 differences of the results of semantically similar
                 queries over disjoint datasets. Our optimization
                 problem considers two types of explanations,
                 provenance-based and value-based, defined over an
                 evidence mapping, which makes our solution
                 interpretable. (2) We design a 3-stage framework for
                 solving the optimal explanation problem. (3) We develop
                 a smart-partitioning optimizer that improves the
                 efficiency of the framework by orders of magnitude. (4)
                 We experiment with real-world and synthetic data to
                 demonstrate that explain3D can derive precise
                 explanations efficiently, and is superior to
                 alternative methods based on integration techniques and
                 single-dataset explanation frameworks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Won:2019:DDS,
  author =       "Youjip Won and Sundoo Kim and Juseong Yun and Dam
                 Quang Tuan and Jiwon Seo",
  title =        "{DASH}: database shadowing for mobile {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "7",
  pages =        "793--806",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3317315.3317321",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this work, we propose Database Shadowing, or DASH,
                 which is a new crash recovery technique for SQLite
                 DBMS. DASH is a hybrid mixture of classical shadow
                 paging and logging. DASH addresses four major issues in
                 the current SQLite journal modes: the performance and
                 write amplification issues of the rollback mode and the
                 storage space requirement and tail latency issues of
                 the WAL mode. DASH exploits two unique characteristics
                 of SQLite: the database files are small and the
                 transactions are entirely serialized. DASH consists of
                 three key ingredients Aggregate Update, Atomic Exchange
                 and Version Reset. Aggregate Update eliminates the
                 redundant write overhead and the requirement to
                 maintain multiple snapshots both of which are inherent
                 in the out-of-place update. Atomic Exchange resolves
                 the overhead of updating the locations of individual
                 database pages exploiting order-preserving nature of
                 the metadata update operation in modern filesystem.
                 Version Reset makes the result of the Atomic Exchange
                 durable without relying on expensive filesystem
                 journaling. The salient aspect of DASH lies in its
                 simplicity and compatibility with the legacy. DASH does
                 not require any modifications in the underlying
                 filesystem or the database organization. It requires
                 only 451 LOC to implement. In Cyclomatic Complexity
                 score, which represents software complexity, DASH
                 renders 33\% lower (simpler) mark than PERSIST and WAL
                 modes of SQLite. We implement DASH for SQLite on
                 Android and extensively evaluate it on widely used
                 smartphone devices. DASH yields 4x performance gain
                 over PERSIST mode (default journaling mode). Compared
                 to WAL mode (the fastest journaling mode), DASH uses
                 only 2.5\% of the storage space on average. The
                 transaction latency of DASH at 99.9\% is one fourth of
                 that of WAL mode.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:AGL,
  author =       "Zeke Wang and Kaan Kara and Hantian Zhang and Gustavo
                 Alonso and Onur Mutlu and Ce Zhang",
  title =        "Accelerating generalized linear models with
                 {MLWeaving}: a one-size-fits-all system for
                 any-precision learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "7",
  pages =        "807--821",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3317315.3317322",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Learning from the data stored in a database is an
                 important function increasingly available in relational
                 engines. Methods using lower precision input data are
                 of special interest given their overall higher
                 efficiency. However, in databases, these methods have a
                 hidden cost: the quantization of the real value into a
                 smaller number is an expensive step. To address this
                 issue, we present ML-Weaving, a data structure and
                 hardware acceleration technique intended to speed up
                 learning of generalized linear models over low
                 precision data. MLWeaving provides a compact in-memory
                 representation that enables the retrieval of data at
                 any level of precision. MLWeaving also provides a
                 highly efficient implementation of stochastic gradient
                 descent on FPGAs and enables the dynamic tuning of
                 precision, instead of using a fixed precision level
                 during learning. Experimental results show that
                 MLWeaving converges up to 16 x faster than
                 low-precision implementations of first-order methods on
                 CPUs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jankov:2019:DRC,
  author =       "Dimitrije Jankov and Shangyu Luo and Binhang Yuan and
                 Zhuhua Cai and Jia Zou and Chris Jermaine and Zekai J.
                 Gao",
  title =        "Declarative recursive computation on an {RDBMS}: or,
                 why you should use a database for distributed machine
                 learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "7",
  pages =        "822--835",
  month =        mar,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3317315.3317323",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 20 17:32:19 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A number of popular systems, most notably Google's
                 TensorFlow, have been implemented from the ground up to
                 support machine learning tasks. We consider how to make
                 a very small set of changes to a modern relational
                 database management system (RDBMS) to make it suitable
                 for distributed learning computations. Changes include
                 adding better support for recursion, and optimization
                 and execution of very large compute plans. We also show
                 that there are key advantages to using an RDBMS as a
                 machine learning platform. In particular, learning
                 based on a database management system allows for
                 trivial scaling to large data sets and especially large
                 models, where different computational units operate on
                 different parts of a model that may be too large to fit
                 into RAM.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ghandeharizadeh:2019:DIE,
  author =       "Shahram Ghandeharizadeh and Hieu Nguyen",
  title =        "Design, implementation, and evaluation of write-back
                 policy with cache augmented data stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "8",
  pages =        "836--849",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3324301.3324302",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Cache Augmented Data Store (CADS) architecture
                 extends a persistent data store with an in-memory cache
                 manager. It is widely deployed to support
                 read-intensive workloads. However, its write-around and
                 write-through policies prevent the caching tier from
                 absorbing write load. This means the data store layer
                 must scale to process writes even when the extra
                 capacity is not needed for read load. We address this
                 limitation by devising a write-back technique to enable
                 the caching layer to process both reads and writes.
                 This technique preserves ACID transactions. We present
                 a client side implementation of write-back and evaluate
                 it using the YCSB, BG, and TPC-C benchmarks. In
                 addition, we compare our write-back with (a) write-back
                 policy of a data store such as MongoDB and (b)
                 write-back policy of a host-side cache such as
                 Flashcache.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nguyen:2019:UGE,
  author =       "Thanh Tam Nguyen and Matthias Weidlich and Hongzhi Yin
                 and Bolong Zheng and Quoc Viet Hung Nguyen and Bela
                 Stantic",
  title =        "User guidance for efficient fact checking",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "8",
  pages =        "850--863",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3324301.3324303",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Web constitutes a valuable source of information.
                 In recent years, it fostered the construction of
                 large-scale knowledge bases, such as Freebase, YAGO,
                 and DBpedia. The open nature of the Web, with content
                 potentially being generated by everyone, however, leads
                 to inaccuracies and misinformation. Construction and
                 maintenance of a knowledge base thus has to rely on
                 fact checking, an assessment of the credibility of
                 facts. Due to an inherent lack of ground truth
                 information, such fact checking cannot be done in a
                 purely automated manner, but requires human
                 involvement. In this paper, we propose a comprehensive
                 framework to guide users in the validation of facts,
                 striving for a minimisation of the invested effort. Our
                 framework is grounded in a novel probabilistic model
                 that combines user input with automated credibility
                 inference. Based thereon, we show how to guide users in
                 fact checking by identifying the facts for which
                 validation is most beneficial. Moreover, our framework
                 includes techniques to reduce the manual effort
                 invested in fact checking by determining when to stop
                 the validation and by supporting efficient batching
                 strategies. We further show how to handle fact checking
                 in a streaming setting. Our experiments with three
                 real-world datasets demonstrate the efficiency and
                 effectiveness of our framework: A knowledge base of
                 high quality, with a precision of above 90\%, is
                 constructed with only a half of the validation effort
                 required by baseline techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ke:2019:DCR,
  author =       "Xiangyu Ke and Arijit Khan and Leroy Lim Hong Quan",
  title =        "An in-depth comparison of $s$--$t$ reliability
                 algorithms over uncertain graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "8",
  pages =        "864--876",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3324301.3324304",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Uncertain, or probabilistic, graphs have been
                 increasingly used to represent noisy linked data in
                 many emerging applications, and have recently attracted
                 the attention of the database research community. A
                 fundamental problem on uncertain graphs is the s-t
                 reliability, which measures the probability that a
                 target node t is reachable from a source node s in a
                 probabilistic (or uncertain) graph, i.e., a graph where
                 every edge is assigned a probability of existence. Due
                 to the inherent complexity of the s-t reliability
                 estimation problem (\#P-hard), various sampling and
                 indexing based efficient algorithms were proposed in
                 the literature. However, since they have not been
                 thoroughly compared with each other, it is not clear
                 whether the later algorithm outperforms the earlier
                 ones. More importantly, the comparison framework,
                 datasets, and metrics were often not consistent (e.g.,
                 different convergence criteria were employed to find
                 the optimal number of samples) across these works. We
                 address this serious concern by re-implementing six
                 state-of-the-art s-t reliability estimation methods in
                 a common system and code base, using several medium and
                 large-scale, real-world graph datasets, identical
                 evaluation metrics, and query workloads. Through our
                 systematic and in-depth analysis of experimental
                 results, we report surprising findings, such as many
                 follow-up algorithms can actually be several orders of
                 magnitude inefficient, less accurate, and more memory
                 intensive compared to the ones that were proposed
                 earlier. We conclude by discussing our recommendations
                 on the road ahead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2019:DSP,
  author =       "Wenfei Fan and Chunming Hu and Muyang Liu and Ping Lu
                 and Qiang Yin and Jingren Zhou",
  title =        "Dynamic scaling for parallel graph computations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "8",
  pages =        "877--890",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3324301.3324305",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper studies scaling out/in to cope with load
                 surges. Given a graph G that is vertex-partitioned and
                 distributed across n processors, it is to add (resp.
                 remove) k processors and re-distribute G across n + k
                 (resp. n --- k ) processors such that the load among
                 the processors is balanced, and its replication factor
                 and migration cost are minimized. We show that this
                 tri-criteria optimization problem is intractable, even
                 when k is a constant and when either load balancing or
                 minimum migration is not required. Nonetheless, we
                 propose two parallel solutions to dynamic scaling. One
                 consists of approximation algorithms by extending
                 consistent hashing. Given a load balancing factor above
                 a lower bound, the algorithms guarantee provable bounds
                 on both replication factor and migration cost. The
                 other is a generic scaling scheme. Given any existing
                 vertex-partitioner VP of users' choice, it adaptively
                 scales VP in and out such that it incurs minimum
                 migration cost, and ensures balance and replication
                 factors within a bound relative to that of VP. Using
                 real-life and synthetic graphs, we experimentally
                 verify the efficiency, effectiveness and scalability of
                 the solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2019:TTR,
  author =       "Dongsheng Li and Yiming Zhang and Jinyan Wang and
                 Kian-Lee Tan",
  title =        "{TopoX}: topology refactorization for efficient graph
                 partitioning and processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "8",
  pages =        "891--905",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3324301.3324306",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional graph partitioning methods attempt to both
                 minimize communication cost and guarantee load
                 balancing in computation. However, the skewed degree
                 distribution of natural graphs makes it difficult to
                 simultaneously achieve the two objectives. This paper
                 proposes topology refactorization (TR), a
                 topology-aware method allowing graph-parallel systems
                 to separately handle the two objectives:
                 refactorization is mainly focused on reducing
                 communication cost, and partitioning is mainly targeted
                 for balancing the load. TR transforms a skewed graph
                 into a more communication-efficient topology through
                 fusion and fission, where the fusion operation
                 organizes a set of neighboring low-degree vertices into
                 a super-vertex, and the fission operation splits a
                 high-degree vertex into a set of sibling sub-vertices.
                 Based on TR, we design an efficient graph-parallel
                 system (TopoX) which pipelines refactorization with
                 partitioning to both reduce communication cost and
                 balance computation load. Prototype evaluation shows
                 that TopoX outperforms state-of-the-art PowerLyra by up
                 to 78.5\% (from 37.2\%) on real-world graphs and is
                 significantly faster than other graph-parallel systems,
                 while only introducing small refactorization overhead
                 and memory consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Avdiukhin:2019:MDB,
  author =       "Dmitrii Avdiukhin and Sergey Pupyrev and Grigory
                 Yaroslavtsev",
  title =        "Multi-dimensional balanced graph partitioning via
                 projected gradient descent",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "8",
  pages =        "906--919",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3324301.3324307",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Motivated by performance optimization of large-scale
                 graph processing systems that distribute the graph
                 across multiple machines, we consider the balanced
                 graph partitioning problem. Compared to most of the
                 previous work, we study the multi-dimensional variant
                 in which balance according to multiple weight functions
                 is required. As we demonstrate by experimental
                 evaluation, such multi-dimensional balance is essential
                 for achieving performance improvements for typical
                 distributed graph processing workloads. We propose a
                 new scalable technique for the multidimensional
                 balanced graph partitioning problem. It is based on
                 applying randomized projected gradient descent to a
                 non-convex continuous relaxation of the objective. We
                 show how to implement the new algorithm efficiently in
                 both theory and practice utilizing various approaches
                 for the projection step. Experiments with large-scale
                 graphs containing up to hundreds of billions of edges
                 indicate that our algorithm has superior performance
                 compared to the state of the art.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2019:EDS,
  author =       "Lei Cao and Yizhou Yan and Samuel Madden and Elke A.
                 Rundensteiner and Mathan Gopalsamy",
  title =        "Efficient discovery of sequence outlier patterns",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "8",
  pages =        "920--932",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3324301.3324308",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern Internet of Things ( IoT ) applications
                 generate massive amounts of time-stamped data, much of
                 it in the form of discrete, symbolic sequences. In this
                 work, we present a new system called TOP that deTects
                 Outlier Patterns from these sequences. To solve the
                 fundamental limitation of existing pattern mining
                 semantics that miss outlier patterns hidden inside of
                 larger frequent patterns, TOP offers new pattern
                 semantics based on contextual patterns that distinguish
                 the independent occurrence of a pattern from its
                 occurrence as part of its super-pattern. We present
                 efficient algorithms for the mining of this new class
                 of contextual patterns. In particular, in contrast to
                 the bottom-up strategy for state-of-the-art pattern
                 mining techniques, our top-down Reduce strategy piggy
                 backs pattern detection with the detection of the
                 context in which a pattern occurs. Our approach
                 achieves linear time complexity in the length of the
                 input sequence. Effective optimization techniques such
                 as context-driven search space pruning and inverted
                 index-based outlier pattern detection are also proposed
                 to further speed up contextual pattern mining. Our
                 experimental evaluation demonstrates the effectiveness
                 of TOP at capturing meaningful outlier patterns in
                 several real-world IoT use cases. We also demonstrate
                 the efficiency of TOP, showing it to be up to 2 orders
                 of magnitude faster than adapting state-of-the-art
                 mining to produce this new class of contextual outlier
                 patterns, allowing us to scale outlier pattern mining
                 to large sequence datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bogatov:2019:CEO,
  author =       "Dmytro Bogatov and George Kollios and Leonid Reyzin",
  title =        "A comparative evaluation of order-revealing encryption
                 schemes and secure range-query protocols",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "8",
  pages =        "933--947",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3324301.3324309",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database query evaluation over encrypted data can
                 allow database users to maintain the privacy of their
                 data while outsourcing data processing.
                 Order-Preserving Encryption (OPE) and Order-Revealing
                 Encryption (ORE) were designed to enable efficient
                 query execution, but provide only partial privacy. More
                 private protocols, based on Searchable Symmetric
                 Encryption (SSE), Oblivious RAM (ORAM) or custom
                 encrypted data structures, have also been designed. In
                 this paper, we develop a framework to provide the first
                 comprehensive comparison among a number of range query
                 protocols that ensure varying levels of privacy of user
                 data. We evaluate five ORE-based and five generic range
                 query protocols. We analyze and compare them both
                 theoretically and experimentally and measure their
                 performance over database indexing and query
                 evaluation. We report not only execution time but also
                 I/O performance, communication amount, and usage of
                 cryptographic primitive operations. Our comparison
                 reveals some interesting insights concerning the
                 relative security and performance of these approaches
                 in database settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Orakzai:2019:HFM,
  author =       "Faisal Orakzai and Toon Calders and Torben Bach
                 Pedersen",
  title =        "$ k / 2$-hop: fast mining of convoy patterns with
                 effective pruning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "9",
  pages =        "948--960",
  month =        may,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3329772.3329773",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increase of devices equipped with location
                 sensors, mining spatio-temporal data for interesting
                 behavioral patterns has gained attention in recent
                 years. One of such well-known patterns is the convoy
                 pattern which can be used, e.g., to find groups of
                 people moving together in public transport or to
                 prevent traffic jams. A convoy consists of at least m
                 objects moving together for at least k consecutive time
                 instants where m and k are user-defined parameters.
                 Convoy mining is an expensive task and existing
                 sequential algorithms do not scale to real-life dataset
                 sizes. Existing sequential as well as parallel
                 algorithms require a complex set of data-dependent
                 parameters which are hard to set and tune. Therefore,
                 in this paper, we propose a new fast exact sequential
                 convoy pattern mining algorithm ``k/2-hop'' that is
                 free of data-dependent parameters. The proposed
                 algorithm processes the data corresponding to a few
                 specific key timestamps at each step and quickly prunes
                 objects with no possibility of forming a convoy. Thus,
                 only a very small portion of the complete dataset is
                 considered for mining convoys. Our experimental results
                 show that k/2-hop outperforms existing sequential as
                 well as parallel convoy pattern mining algorithms by
                 orders of magnitude, and scales to larger datasets
                 which existing algorithms fail on.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2019:BAD,
  author =       "Ji Sun and Zeyuan Shang and Guoliang Li and Dong Deng
                 and Zhifeng Bao",
  title =        "Balance-aware distributed string similarity-based
                 query processing system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "9",
  pages =        "961--974",
  month =        may,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3329772.3329774",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data analysts spend more than 80\% of time on data
                 cleaning and integration in the whole process of data
                 analytics due to data errors and inconsistencies.
                 Similarity-based query processing is an important way
                 to tolerate the errors and inconsistencies. However,
                 similarity-based query processing is rather costly and
                 traditional database cannot afford such expensive
                 requirement. In this paper, we develop a distributed
                 in-memory similarity-based query processing system
                 called Dima. Dima supports four core similarity
                 operations, i.e., similarity selection, similarity
                 join, top- k selection and top- k join. Dima extends
                 SQL for users to easily invoke these similarity-based
                 operations in their data analysis tasks. To avoid
                 expensive data transmission in a distributed
                 environment, we propose balance-aware signatures where
                 two records are similar if they share common
                 signatures, and we can adaptively select the signatures
                 to balance the workload. Dima builds signature-based
                 global indexes and local indexes to support similarity
                 operations. Since Spark is one of the widely adopted
                 distributed in-memory computing systems, we have
                 seamlessly integrated Dima into Spark and developed
                 effective query optimization techniques in Spark. To
                 the best of our knowledge, this is the first
                 full-fledged distributed in-memory system that can
                 support complex similarity-based query processing on
                 large-scale datasets. We have conducted extensive
                 experiments on four real-world datasets. Experimental
                 results show that Dima outperforms state-of-the-art
                 studies by 1--3 orders of magnitude and has good
                 scalability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ruan:2019:FGS,
  author =       "Pingcheng Ruan and Gang Chen and Tien Tuan Anh Dinh
                 and Qian Lin and Beng Chin Ooi and Meihui Zhang",
  title =        "Fine-grained, secure and efficient data provenance on
                 blockchain systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "9",
  pages =        "975--988",
  month =        may,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3329772.3329775",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The success of Bitcoin and other cryptocurrencies
                 bring enormous interest to blockchains. A blockchain
                 system implements a tamper-evident ledger for recording
                 transactions that modify some global states. The system
                 captures entire evolution history of the states. The
                 management of that history, also known as data
                 provenance or lineage, has been studied extensively in
                 database systems. However, querying data history in
                 existing blockchains can only be done by replaying all
                 transactions. This approach is applicable to
                 large-scale, offline analysis, but is not suitable for
                 online transaction processing. We present LineageChain,
                 a fine-grained, secure and efficient provenance system
                 for blockchains. LineageChain exposes provenance
                 information to smart contracts via simple and elegant
                 interfaces, thereby enabling a new class of blockchain
                 applications whose execution logics depend on
                 provenance information at runtime. LineageChain
                 captures provenance during contract execution, and
                 efficiently stores it in a Merkle tree. LineageChain
                 provides a novel skip list index designed for
                 supporting efficient provenance query processing. We
                 have implemented LineageChain on top of Hyperledger and
                 a blockchain-optimized storage system called ForkBase.
                 Our extensive evaluation of LineageChain demonstrates
                 its benefits to the new class of blockchain
                 applications, its efficient query, and its small
                 storage overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Choi:2019:PTK,
  author =       "Dalsu Choi and Chang-Sup Park and Yon Dohn Chung",
  title =        "Progressive top-$k$ subarray query processing in array
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "9",
  pages =        "989--1001",
  month =        may,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3329772.3329776",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Unprecedented amounts of multidimensional array data
                 are currently being generated in many fields. These
                 multidimensional array data naturally and efficiently
                 fit into the array data model, and many array
                 management systems based on the array data model have
                 appeared. Accordingly, the requirement for data
                 exploration methods for large multidimensional array
                 data has also increased. In this paper, we propose a
                 method for efficient top- k subarray query processing
                 in array databases, which is one of the most important
                 query types for exploring multidimensional data. First,
                 we define novel top- k query models for array
                 databases: overlap-allowing and disjoint top- k
                 subarray queries. Second, we propose a suite of top- k
                 subarray query processing methods, called PPTS and
                 extend them to distributed processing. Finally, we
                 present the results of extensive experiments using real
                 datasets from an array database, which show that our
                 proposed methods outperform existing na{\"\i}ve
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hoffmann:2019:MLC,
  author =       "Moritz Hoffmann and Andrea Lattuada and Frank
                 McSherry",
  title =        "{Megaphone}: latency-conscious state migration for
                 distributed streaming dataflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "9",
  pages =        "1002--1015",
  month =        may,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3329772.3329777",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We design and implement Megaphone, a data migration
                 mechanism for stateful distributed dataflow engines
                 with latency objectives. When compared to existing
                 migration mechanisms, Megaphone has the following
                 differentiating characteristics: (i) migrations can be
                 subdivided to a configurable granularity to avoid
                 latency spikes, and (ii) migrations can be prepared
                 ahead of time to avoid runtime coordination. Megaphone
                 is implemented as a library on an unmodified timely
                 dataflow implementation, and provides an operator
                 interface compatible with its existing APIs. We
                 evaluate Megaphone on established benchmarks with
                 varying amounts of state and observe that compared to
                 na{\"\i}ve approaches Megaphone reduces service
                 latencies during reconfiguration by orders of magnitude
                 without significantly increasing steady-state
                 overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tam:2019:ADR,
  author =       "Nguyen Thanh Tam and Matthias Weidlich and Bolong
                 Zheng and Hongzhi Yin and Nguyen Quoc Viet Hung and
                 Bela Stantic",
  title =        "From anomaly detection to rumour detection using data
                 streams of social platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "9",
  pages =        "1016--1029",
  month =        may,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3329772.3329778",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Social platforms became a major source of rumours.
                 While rumours can have severe real-world implications,
                 their detection is notoriously hard: Content on social
                 platforms is short and lacks semantics; it spreads
                 quickly through a dynamically evolving network; and
                 without considering the context of content, it may be
                 impossible to arrive at a truthful interpretation.
                 Traditional approaches to rumour detection, however,
                 exploit solely a single content modality, e.g., social
                 media posts, which limits their detection accuracy. In
                 this paper, we cope with the aforementioned challenges
                 by means of a multi-modal approach to rumour detection
                 that identifies anomalies in both, the entities (e.g.,
                 users, posts, and hashtags) of a social platform and
                 their relations. Based on local anomalies, we show how
                 to detect rumours at the network level, following a
                 graph-based scan approach. In addition, we propose
                 incremental methods, which enable us to detect rumours
                 using streaming data of social platforms. We illustrate
                 the effectiveness and efficiency of our approach with a
                 real-world dataset of 4M tweets with more than 1000
                 rumours.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gupta:2019:OIT,
  author =       "Peeyush Gupta and Yin Li and Sharad Mehrotra and Nisha
                 Panwar and Shantanu Sharma and Sumaya Almanee",
  title =        "{Obscure}: information-theoretic oblivious and
                 verifiable aggregation queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "9",
  pages =        "1030--1043",
  month =        may,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3329772.3329779",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Despite extensive research on cryptography, secure and
                 efficient query processing over outsourced data remains
                 an open challenge. We develop communication-efficient
                 and information-theoretically secure algorithms for
                 privacy-preserving aggregation queries using
                 multi-party computation (MPC). Specifically, query
                 processing techniques over secret-shared data
                 outsourced by single or multiple database owners are
                 developed. These algorithms allow a user to execute
                 queries on the secret-shared database and also prevent
                 the network and the (adversarial) clouds to learn the
                 user's queries, results, or the database. We further
                 develop (non-mandatory) privacy-preserving result
                 verification algorithms that detect malicious
                 behaviors, and experimentally validate the efficiency
                 of our approach over large datasets, the size of which
                 prior approaches to secret-sharing or MPC systems have
                 not scaled to.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dutt:2019:SER,
  author =       "Anshuman Dutt and Chi Wang and Azade Nazi and Srikanth
                 Kandula and Vivek Narasayya and Surajit Chaudhuri",
  title =        "Selectivity estimation for range predicates using
                 lightweight models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "9",
  pages =        "1044--1057",
  month =        may,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3329772.3329780",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query optimizers depend on selectivity estimates of
                 query predicates to produce a good execution plan. When
                 a query contains multiple predicates, today's
                 optimizers use a variety of assumptions, such as
                 independence between predicates, to estimate
                 selectivity. While such techniques have the benefit of
                 fast estimation and small memory footprint, they often
                 incur large selectivity estimation errors. In this
                 work, we reconsider selectivity estimation as a
                 regression problem. We explore application of neural
                 networks and tree-based ensembles to the important
                 problem of selectivity estimation of multi-dimensional
                 range predicates. While their straightforward
                 application does not outperform even simple baselines,
                 we propose two simple yet effective design choices,
                 i.e., regression label transformation and feature
                 engineering, motivated by the selectivity estimation
                 context. Through extensive empirical evaluation across
                 a variety of datasets, we show that the proposed models
                 deliver both highly accurate estimates as well as fast
                 estimation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yuan:2019:CSP,
  author =       "Ye Yuan and Xiang Lian and Guoren Wang and Yuliang Ma
                 and Yishu Wang",
  title =        "Constrained shortest path query in a large
                 time-dependent graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1058--1070",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339491",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The constrained shortest path (CSP) query over static
                 graphs has been extensively studied, since it has wide
                 applications in transportation networks,
                 telecommunication networks and etc. Such networks are
                 dynamic and evolve over time, being modeled as
                 time-dependent graphs. Therefore, in this paper, we
                 study the CSP query over a large time-dependent graph.
                 Specifically, we study the point CSP (PCSP) query and
                 interval CSP (ICSP) query. We formally prove that it is
                 NP-complete to process a PCSP query and at least
                 EXPSPACE to answer an ICSP query. We propose
                 approximate sequential algorithms to answer the PCSP
                 and ICSP queries efficiently. We also develop parallel
                 algorithms for the queries that guarantee to scale with
                 big time-dependent graphs. Using real-life graphs, we
                 experimentally verify the efficiency and scalability of
                 our algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chu:2019:FTC,
  author =       "Lingyang Chu and Zhefeng Wang and Jian Pei and Yanyan
                 Zhang and Yu Yang and Enhong Chen",
  title =        "Finding theme communities from database networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1071--1084",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339492",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a database network where each vertex is
                 associated with a transaction database, we are
                 interested in finding theme communities. Here, a theme
                 community is a cohesive subgraph such that a common
                 pattern is frequent in all transaction databases
                 associated with the vertices in the subgraph. Finding
                 all theme communities from a database network enjoys
                 many novel applications. However, it is challenging
                 since even counting the number of all theme communities
                 in a database network is \#P-hard. Inspired by the
                 observation that a theme community shrinks when the
                 length of the pattern increases, we investigate several
                 properties of theme communities and develop TCFI, a
                 scalable algorithm that uses these properties to
                 effectively prune the patterns that cannot form any
                 theme community. We also design TC-Tree, a scalable
                 algorithm that decomposes and indexes theme communities
                 efficiently. Retrieving a ranked list of theme
                 communities from a TC-Tree of hundreds of millions of
                 theme communities takes less than 1 second. Extensive
                 experiments and a case study demonstrate the
                 effectiveness and scalability of TCFI and TC-Tree in
                 discovering and querying meaningful theme communities
                 from large database networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pan:2019:RSB,
  author =       "James J. Pan and Guoliang Li and Juntao Hu",
  title =        "{Ridesharing}: simulator, benchmark, and evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1085--1098",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339493",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Ridesharing is becoming a popular mode of
                 transportation with profound effects on the industry.
                 Recent algorithms for vehicle-to-customer matching have
                 been developed; yet cross-study evaluations of their
                 performance and applicability to real-world ridesharing
                 are lacking. Evaluation is complicated by the online
                 and real-time nature of the ridesharing problem. In
                 this paper, we develop a simulator for evaluating
                 ridesharing algorithms, and we provide a set of
                 benchmarks to test a wide range of scenarios
                 encountered in the real world. These scenarios include
                 different road networks, different numbers of vehicles,
                 larger scales of customer requests, and others. We
                 apply the benchmarks to several state-of-the-art search
                 and join based ridesharing algorithms to demonstrate
                 the usefulness of the simulator and the benchmarks. We
                 find quickly-computable heuristics outperforming other
                 more complex methods, primarily due to faster
                 computation speed. Our work points the direction for
                 designing and evaluating future ridesharing
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lai:2019:DSM,
  author =       "Longbin Lai and Zhu Qing and Zhengyi Yang and Xin Jin
                 and Zhengmin Lai and Ran Wang and Kongzhang Hao and
                 Xuemin Lin and Lu Qin and Wenjie Zhang and Ying Zhang
                 and Zhengping Qian and Jingren Zhou",
  title =        "Distributed subgraph matching on timely dataflow",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1099--1112",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339494",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently there emerge many distributed algorithms that
                 aim at solving subgraph matching at scale. Existing
                 algorithm-level comparisons failed to provide a
                 systematic view of distributed subgraph matching mainly
                 due to the intertwining of strategy and optimization.
                 In this paper, we identify four strategies and three
                 general-purpose optimizations from representative
                 state-of-the-art algorithms. We implement the four
                 strategies with the optimizations based on the common
                 Timely dataflow system for systematic strategy-level
                 comparison. Our implementation covers all
                 representative algorithms. We conduct extensive
                 experiments for both unlabelled matching and labelled
                 matching to analyze the performance of distributed
                 subgraph matching under various settings, which is
                 finally summarized as a practical guide.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qiao:2019:HDS,
  author =       "Shi Qiao and Adrian Nicoara and Jin Sun and Marc
                 Friedman and Hiren Patel and Jaliya Ekanayake",
  title =        "Hyper dimension shuffle: efficient data repartition at
                 petabyte scale in {SCOPE}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1113--1125",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339495",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In distributed query processing, data shuffle is one
                 of the most costly operations. We examined scaling
                 limitations to data shuffle that current systems and
                 the research literature do not solve. As the number of
                 input and output partitions increases, na{\"\i}ve
                 shuffling will result in high fan-out and fan-in. There
                 are practical limits to fan-out, as a consequence of
                 limits on memory buffers, network ports and I/O
                 handles. There are practical limits to fan-in because
                 it multiplies the communication errors due to faults in
                 commodity clusters impeding progress. Existing
                 solutions that limit fan-out and fan-in do so at the
                 cost of scaling quadratically in the number of nodes in
                 the data flow graph. This dominates the costs of
                 shuffling large datasets. We propose a novel algorithm
                 called Hyper Dimension Shuffle that we have introduced
                 in production in SCOPE, Microsoft's internal big data
                 analytics system. Hyper Dimension Shuffle is inspired
                 by the divide and conquer concept, and utilizes a
                 recursive partitioner with intermediate aggregations.
                 It yields quasilinear complexity of the shuffling graph
                 with tight guarantees on fan-out and fan-in. We
                 demonstrate how it avoids the shuffling graph blow-up
                 of previous algorithms to shuffle at petabyte-scale
                 efficiently on both synthetic benchmarks and real
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cormode:2019:ARQ,
  author =       "Graham Cormode and Tejas Kulkarni and Divesh
                 Srivastava",
  title =        "Answering range queries under local differential
                 privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1126--1138",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339496",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Counting the fraction of a population having an input
                 within a specified interval i.e. a range query, is a
                 fundamental data analysis primitive. Range queries can
                 also be used to compute other core statistics such as
                 quantiles, and to build prediction models. However,
                 frequently the data is subject to privacy concerns when
                 it is drawn from individuals, and relates for example
                 to their financial, health, religious or political
                 status. In this paper, we introduce and analyze methods
                 to support range queries under the local variant of
                 differential privacy [23], an emerging standard for
                 privacy-preserving data analysis. The local model
                 requires that each user releases a noisy view of her
                 private data under a privacy guarantee. While many
                 works address the problem of range queries in the
                 trusted aggregator setting, this problem has not been
                 addressed specifically under untrusted aggregation
                 (local DP) model even though many primitives have been
                 developed recently for estimating a discrete
                 distribution. We describe and analyze two classes of
                 approaches for range queries, based on hierarchical
                 histograms and the Haar wavelet transform. We show that
                 both have strong theoretical accuracy guarantees on
                 variance. In practice, both methods are fast and
                 require minimal computation and communication
                 resources. Our experiments show that the wavelet
                 approach is most accurate in high privacy settings,
                 while the hierarchical approach dominates for weaker
                 privacy requirements.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:VPB,
  author =       "Kai Wang and Xuemin Lin and Lu Qin and Wenjie Zhang
                 and Ying Zhang",
  title =        "Vertex priority based butterfly counting for
                 large-scale bipartite networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1139--1152",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339497",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Bipartite networks are of great importance in many
                 real-world applications. In bipartite networks,
                 butterfly (i.e., a complete 2 x 2 biclique) is the
                 smallest non-trivial cohesive structure and plays a key
                 role. In this paper, we study the problem of efficient
                 counting the number of butterflies in bipartite
                 networks. The most advanced techniques are based on
                 enumerating wedges which is the dominant cost of
                 counting butterflies. Nevertheless, the existing
                 algorithms cannot efficiently handle large-scale
                 bipartite networks. This becomes a bottleneck in
                 large-scale applications. In this paper, instead of the
                 existing layer-priority-based techniques, we propose a
                 vertex-priority-based paradigm BFC-VP to enumerate much
                 fewer wedges; this leads to a significant improvement
                 of the time complexity of the state-of-the-art
                 algorithms. In addition, we present cache-aware
                 strategies to further improve the time efficiency while
                 theoretically retaining the time complexity of BFC-VP.
                 Moreover, we also show that our proposed techniques can
                 work efficiently in external and parallel contexts. Our
                 extensive empirical studies demonstrate that the
                 proposed techniques can speed up the state-of-the-art
                 techniques by up to two orders of magnitude for the
                 real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2019:BVS,
  author =       "Yang Cao and Wenfei Fan and Tengfei Yuan",
  title =        "Block as a value for {SQL} over {NoSQL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1153--1166",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339498",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper presents Zidian, a middleware for
                 key--value (KV) stores to speed up SQL query evaluation
                 over NoSQL. As opposed to common practice that takes a
                 tuple id or primary key as key and the entire tuple as
                 value, Zidian proposes a block-as-a-value model BaaV.
                 BaaV represents a relation as keyed blocks ( k, B ),
                 where k is a key of a block (a set) B of partial
                 tuples. We extend relational algebra to BaaV. We show
                 that under BaaV, Zidian substantially reduces data
                 access and communication cost. We provide
                 characterizations (sufficient and necessary conditions)
                 for (a) result-preserving queries, i.e., queries
                 covered by available BaaV stores, (b) scan-free
                 queries, i.e., queries that can be evaluated without
                 scanning any table, and (c) bounded queries, i.e.,
                 queries that can be answered by accessing a bounded
                 amount of data. We show that in parallel processing,
                 Zidian guarantees (a) no scans for scan-free queries,
                 (b) bounded communication cost for bounded queries; and
                 (c) parallel scalability, i.e., speed up when adding
                 processors. Moreover, Zidian can be plugged into
                 existing SQL-over-NoSQL systems and retains horizontal
                 scalability. Using benchmark and real-life data, we
                 empirically verify that Zidian improves existing
                 SQL-over-NoSQL systems by 2 orders of magnitude on
                 average.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tangwongsan:2019:OGO,
  author =       "Kanat Tangwongsan and Martin Hirzel and Scott
                 Schneider",
  title =        "Optimal and general out-of-order sliding-window
                 aggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1167--1180",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339499",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Sliding-window aggregation derives a user-defined
                 summary of the most-recent portion of a data stream.
                 For in-order streams, each window change can be handled
                 in O (1) time even when the aggregation operator is not
                 invertible. But streaming data often arrive inherently
                 out-of-order, e.g., due to clock drifts and
                 communication delays. For such streams, prior work
                 resorted to latency-prone buffering or spent O (log n )
                 time for every window change, where n is the
                 instantaneous window size. This paper presents FiBA, a
                 novel real-time sliding window aggregation algorithm
                 that optimally handles streams of varying degrees of
                 out-of-orderness. FiBA is as general as the
                 state-of-the-art and supports variable-sized windows.
                 An insert or evict takes amortized O (log d ) time,
                 where d is the distance of the change to the window's
                 boundary. This means O (1) time for in-order arrivals
                 and nearly O (1) time for slightly out-of-order
                 arrivals, tending to O (log n ) time for the most
                 severely out-of-order arrivals. We also prove a
                 matching lower bound, showing optimality. At its heart,
                 the algorithm combines and extends finger searching,
                 lazy rebalancing, and position-aware partial
                 aggregates. Further, FiBA can answer range queries that
                 aggregate subwindows for window sharing. Finally, our
                 experiments show that FiBA performs well in practice
                 and conforms to the theoretical findings, with
                 significantly higher throughput than O (log n )
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tang:2019:CTR,
  author =       "Bo Tang and Kyriakos Mouratidis and Man Lung Yiu and
                 Zhenyu Chen",
  title =        "Creating top ranking options in the continuous option
                 and preference space",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1181--1194",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339500",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Top- k queries are extensively used to retrieve the k
                 most relevant options (e.g., products, services,
                 accommodation alternatives, etc) based on a weighted
                 scoring function that captures user preferences. In
                 this paper, we take the viewpoint of a business owner
                 who plans to introduce a new option to the market, with
                 a certain type of clientele in mind. Given a target
                 region in the consumer spectrum, we determine what
                 attribute values the new option should have, so that it
                 ranks among the top- k for any user in that region. Our
                 methodology can also be used to improve an existing
                 option, at the minimum modification cost, so that it
                 ranks consistently high for an intended type of
                 customers. This is the first work on competitive option
                 placement where no distinct user(s) are targeted, but a
                 general clientele type, i.e., a continuum of possible
                 preferences. Here also lies our main challenge (and
                 contribution), i.e., dealing with the interplay between
                 two continuous spaces: the targeted region in the
                 preference spectrum, and the option domain (where the
                 new option will be placed). At the core of our
                 methodology lies a novel and powerful interlinking
                 between the two spaces. Our algorithms offer exact
                 answers in practical response times, even for the
                 largest of the standard benchmark datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ma:2019:OBE,
  author =       "Hanchao Ma and Morteza Alipourlangouri and Yinghui Wu
                 and Fei Chiang and Jiaxing Pi",
  title =        "Ontology-based entity matching in attributed graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1195--1207",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339501",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Keys for graphs incorporate the topology and value
                 constraints needed to uniquely identify entities in a
                 graph. They have been studied to support object
                 identification, knowledge fusion, and social network
                 reconciliation. Existing key constraints identify
                 entities as the matches of a graph pattern by subgraph
                 isomorphism, which enforce label equality on node
                 types. These constraints can be too restrictive to
                 characterize structures and node labels that are
                 syntactically different but semantically equivalent. We
                 propose a new class of key constraints, Ontological
                 Graph Keys (OGKs) that extend conventional graph keys
                 by ontological subgraph matching between entity labels
                 and an external ontology. We show that the implication
                 and validation problems for OGKs are each NP-complete.
                 To reduce the entity matching cost, we also provide an
                 algorithm to compute a minimal cover for OGKs. We then
                 study the entity matching problem with OGKs, and a
                 practical variant with a budget on the matching cost.
                 We develop efficient algorithms to perform entity
                 matching based on a (budgeted) Chase procedure. Using
                 real-world graphs, we experimentally verify the
                 efficiency and accuracy of OGK-based entity matching.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2019:RTD,
  author =       "Lu Chen and Yunjun Gao and Ziquan Fang and Xiaoye Miao
                 and Christian S. Jensen and Chenjuan Guo",
  title =        "Real-time distributed co-movement pattern detection on
                 streaming trajectories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1208--1220",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339502",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the widespread deployment of mobile devices with
                 positioning capabilities, increasingly massive volumes
                 of trajectory data are being collected that capture the
                 movements of people and vehicles. This data enables
                 co-movement pattern detection, which is important in
                 applications such as trajectory compression and
                 future-movement prediction. Existing co-movement
                 pattern detection studies generally consider historical
                 data and thus propose offline algorithms. However,
                 applications such as future movement prediction need
                 real-time processing over streaming trajectories. Thus,
                 we investigate real-time distributed co-movement
                 pattern detection over streaming trajectories. Existing
                 off-line methods assume that all data is available when
                 the processing starts. Nevertheless, in a streaming
                 setting, unbounded data arrives in real time, making
                 pattern detection challenging. To this end, we propose
                 a framework based on Apache Flink, which is designed
                 for efficient distributed streaming data processing.
                 The framework encompasses two phases: clustering and
                 pattern enumeration. To accelerate the clustering, we
                 use a range join based on two-layer indexing, and
                 provide techniques that eliminate unnecessary
                 verifications. To perform pattern enumeration
                 efficiently, we present two methods FBA and VBA that
                 utilize id-based partitioning. When coupled with bit
                 compression and candidate-based enumeration techniques,
                 we reduce the enumeration cost from exponential to
                 linear. Extensive experiments offer insight into the
                 efficiency of the proposed framework and its
                 constituent techniques compared with existing
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tan:2019:IIB,
  author =       "Jian Tan and Tieying Zhang and Feifei Li and Jie Chen
                 and Qixing Zheng and Ping Zhang and Honglin Qiao and
                 Yue Shi and Wei Cao and Rui Zhang",
  title =        "{iBTune}: individualized buffer tuning for large-scale
                 cloud databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "10",
  pages =        "1221--1234",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3339490.3339503",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Tuning the buffer size appropriately is critical to
                 the performance of a cloud database, since memory is
                 usually the resource bottleneck. For large-scale
                 databases supporting heterogeneous applications,
                 configuring the individual buffer sizes for a
                 significant number of database instances presents a
                 scalability challenge. Manual optimization is neither
                 efficient nor effective, and even not feasible for
                 large cloud clusters, especially when the workload may
                 dynamically change on each instance. The difficulty
                 lies in the fact that each database instance requires a
                 different buffer size that is highly individualized,
                 subject to the constraint of the total buffer memory
                 space. It is imperative to resort to algorithms that
                 automatically orchestrate the buffer pool tuning for
                 the entire database instances. To this end, we design
                 iBTune that has been deployed for more than 10, 000
                 OLTP cloud database instances in our production system.
                 Specifically, it leverages the information from similar
                 workloads to find out the tolerable miss ratio of each
                 instance. Then, it utilizes the relationship between
                 miss ratios and allocated memory sizes to individually
                 optimize the target buffer pool sizes. To provide a
                 guaranteed level of service level agreement (SLA), we
                 design a pairwise deep neural network that uses
                 features from measurements on pairs of instances to
                 predict the upper bounds of the request response times.
                 A target buffer pool size can be adjusted only when the
                 predicted response time upper bound is in a safe limit.
                 The successful deployment on a production environment,
                 which safely reduces the memory footprint by more than
                 17\% compared to the original system that relies on
                 manual configurations, demonstrates the effectiveness
                 of our solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Whittaker:2019:OTI,
  author =       "Michael Whittaker and Nick Edmonds and Sandeep Tata
                 and James B. Wendt and Marc Najork",
  title =        "Online template induction for machine-generated
                 emails",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1235--1248",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342264",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In emails, information abounds. Whether it be a bill
                 reminder, a hotel confirmation, or a shipping
                 notification, our emails contain useful bits of
                 information that enable a number of applications. Most
                 of this email traffic is machine-generated, sent from a
                 business to a human. These business-to-consumer emails
                 are typically instantiated from a set of email
                 templates, and discovering these templates is a key
                 step in enabling a variety of intelligent experiences.
                 Existing email information extraction systems typically
                 separate information extraction into two steps: an
                 offline template discovery process (called template
                 induction) that is periodically run on a sample of
                 emails, and an online email annotation process that
                 applies discovered templates to emails as they arrive.
                 Since information extraction requires an email's
                 template to be known, any delay in discovering a newly
                 created template causes missed extractions, lowering
                 the overall extraction coverage. In this paper, we
                 present a novel system called Crusher that discovers
                 templates completely online, reducing template
                 discovery delay from a week (for the existing
                 MapReduce-based batch system) to minutes. Furthermore,
                 Crusher has a resource consumption footprint that is
                 significantly smaller than the existing batch system.
                 We also report on the surprising lesson we learned that
                 conventional stream processing systems do not present a
                 good framework on which to build Crusher. Crusher
                 delivers an order of magnitude more throughput than a
                 prototype built using a stream processing engine. We
                 hope that these lessons help designers of stream
                 processing systems accommodate a broader range of
                 applications like online template induction in the
                 future.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:QSP,
  author =       "Yong Wang and Guoliang Li and Nan Tang",
  title =        "Querying shortest paths on time dependent road
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1249--1261",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342265",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "For real-world time dependent road networks (TDRNs),
                 answering shortest path-based route queries and plans
                 in real-time is highly desirable by many industrial
                 applications. Unfortunately, traditional ( Dijkstra ---
                 or A *-like) algorithms are computationally expensive
                 for such tasks on TDRNs. Naturally, indexes are needed
                 to meet the real-time constraint required by real
                 applications. In this paper, we propose a novel
                 height-balanced tree-structured index, called
                 TD-G-tree, which supports fast route queries over
                 TDRNs. The key idea is to use hierarchical graph
                 partitioning to split a road network into hierarchical
                 partitions. This will produce a balanced tree, where
                 each tree node corresponds to a partition and each
                 parent-child relationship corresponds to a partition
                 and its sub-partition. We then compute and index time
                 dependent shortest paths (TDSPs) only for borders (
                 i.e., vertices whose edges are cut by a partition).
                 Based on TD-G-tree, we devise efficient algorithms to
                 support TDSP queries, as well as time-interval based
                 route planning, for computing optimal solutions through
                 dynamic programming and chronological
                 divide-and-conquer. Extensive experiments on real-world
                 datasets show that our method significantly outperforms
                 existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fariha:2019:EDQ,
  author =       "Anna Fariha and Alexandra Meliou",
  title =        "Example-driven query intent discovery: abductive
                 reasoning using semantic similarity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1262--1275",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342266",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Traditional relational data interfaces require precise
                 structured queries over potentially complex schemas.
                 These rigid data retrieval mechanisms pose hurdles for
                 non-expert users, who typically lack language expertise
                 and are unfamiliar with the details of the schema.
                 Query by Example (QBE) methods offer an alternative
                 mechanism: users provide examples of their intended
                 query output and the QBE system needs to infer the
                 intended query. However, these approaches focus on the
                 structural similarity of the examples and ignore the
                 richer context present in the data. As a result, they
                 typically produce queries that are too general, and
                 fail to capture the user's intent effectively. In this
                 paper, we present SQuID, a system that performs
                 semantic similarity-aware query intent discovery. Our
                 work makes the following contributions: (1) We design
                 an end-to-end system that automatically formulates
                 select-project-join queries in an open-world setting,
                 with optional group-by aggregation and intersection
                 operators; a much larger class than prior QBE
                 techniques. (2) We express the problem of query intent
                 discovery using a probabilistic abduction model, that
                 infers a query as the most likely explanation of the
                 provided examples. (3) We introduce the notion of an
                 abduction-ready database, which precomputes semantic
                 properties and related statistics, allowing SQuID to
                 achieve real-time performance. (4) We present an
                 extensive empirical evaluation on three real-world
                 datasets, including user-intent case studies,
                 demonstrating that SQuID is efficient and effective,
                 and outperforms machine learning methods, as well as
                 the state-of-the-art in the related query reverse
                 engineering problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2019:AVQ,
  author =       "Qi Zhou and Joy Arulraj and Shamkant Navathe and
                 William Harris and Dong Xu",
  title =        "Automated verification of query equivalence using
                 satisfiability modulo theories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1276--1288",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342267",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database-as-a-service offerings enable users to
                 quickly create and deploy complex data processing
                 pipelines. In practice, these pipelines often exhibit
                 significant overlap of computation due to redundant
                 execution of certain sub-queries. It is challenging for
                 developers and database administrators to manually
                 detect overlap across queries since they may be
                 distributed across teams, organization roles, and
                 geographic locations. Thus, we require automated
                 cloud-scale tools for identifying equivalent queries to
                 minimize computation overlap. State-of-the-art
                 algebraic approaches to automated verification of query
                 equivalence suffer from two limitations. First, they
                 are unable to model the semantics of widely-used SQL
                 features, such as complex query predicates and
                 three-valued logic. Second, they have a computationally
                 intensive verification procedure. These limitations
                 restrict their efficacy and efficiency in cloud-scale
                 database-as-a-service offerings. This paper makes the
                 case for an alternate approach to determining query
                 equivalence based on symbolic representation. The key
                 idea is to effectively transform a wide range of SQL
                 queries into first order logic formulae and then use
                 satisfiability modulo theories to efficiently verify
                 their equivalence. We have implemented this symbolic
                 representation-based approach in EQUITAS. Our
                 evaluation shows that EQUITAS proves the semantic
                 equivalence of a larger set of query pairs compared to
                 algebraic approaches and reduces the verification time
                 by 27X. We also demonstrate that on a set of 17,461
                 real-world SQL queries, it automatically identifies
                 redundant execution across 11\% of the queries. Our
                 symbolic-representation based technique is currently
                 deployed on Alibaba's MaxCompute database-as-a-service
                 platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2019:TUF,
  author =       "Pengfei Xu and Jiaheng Lu",
  title =        "Towards a unified framework for string similarity
                 joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1289--1302",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342268",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A similarity join aims to find all similar pairs
                 between two collections of records. Established
                 algorithms utilise different similarity measures,
                 either syntactic or semantic, to quantify the
                 similarity between two records. However, when records
                 are similar in forms of a mixture of syntactic and
                 semantic relations, utilising a single measure becomes
                 inadequate to disclose the real similarity between
                 records, and hence unable to obtain high-quality join
                 results. In this paper, we study a unified framework to
                 find similar records by combining multiple similarity
                 measures. To achieve this goal, we first develop a new
                 similarity framework that unifies the existing three
                 kinds of similarity measures simultaneously, including
                 syntactic (typographic) similarity, synonym-based
                 similarity, and taxonomy-based similarity. We then
                 theoretically prove that finding the maximum unified
                 similarity between two strings is generally NP -hard,
                 and furthermore develop an approximate algorithm which
                 runs in polynomial time with a non-trivial
                 approximation guarantee. To support efficient string
                 joins based on our unified similarity measure, we adopt
                 the filter-and-verification framework and propose a new
                 signature structure, called pebble, which can be
                 simultaneously adapted to handle multiple similarity
                 measures. The salient feature of our approach is that,
                 it can judiciously select the best pebble signatures
                 and the overlap thresholds to maximise the filtering
                 power. Extensive experiments show that our methods are
                 capable of finding similar records having mixed types
                 of similarity relations, while exhibiting high
                 efficiency and scalability for similarity joins. The
                 implementation can be downloaded at
                 https://github.com/HY-UDBMS/AU-Join.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yoon:2019:NEF,
  author =       "Susik Yoon and Jae-Gil Lee and Byung Suk Lee",
  title =        "{NETS}: extremely fast outlier detection from a data
                 stream via set-based processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1303--1315",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342269",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "This paper addresses the problem of efficiently
                 detecting outliers from a data stream as old data
                 points expire from and new data points enter the window
                 incrementally. The proposed method is based on a newly
                 discovered characteristic of a data stream that the
                 change in the locations of data points in the data
                 space is typically very insignificant. This observation
                 has led to the finding that the existing distance-based
                 outlier detection algorithms perform excessive
                 unnecessary computations that are repetitive and/or
                 canceling out the effects. Thus, in this paper, we
                 propose a novel set-based approach to detecting
                 outliers, whereby data points at similar locations are
                 grouped and the detection of outliers or inliers is
                 handled at the group level. Specifically, a new
                 algorithm NETS is proposed to achieve a remarkable
                 performance improvement by realizing set-based early
                 identification of outliers or inliers and taking
                 advantage of the ``net effect'' between expired and new
                 data points. Additionally, NETS is capable of achieving
                 the same efficiency even for a high-dimensional data
                 stream through two-level dimensional filtering.
                 Comprehensive experiments using six real-world data
                 streams show 5 to 25 times faster processing time than
                 state-of-the-art algorithms with comparable memory
                 consumption. We assert that NETS opens a new
                 possibility to real-time data stream outlier
                 detection.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2019:SST,
  author =       "Yi Lu and Xiangyao Yu and Samuel Madden",
  title =        "{STAR}: scaling transactions through asymmetric
                 replication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1316--1329",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342270",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we present STAR, a new distributed
                 in-memory database with asymmetric replication. By
                 employing a single-node non-partitioned architecture
                 for some replicas and a partitioned architecture for
                 other replicas, STAR is able to efficiently run both
                 highly partitionable workloads and workloads that
                 involve cross-partition transactions. The key idea is a
                 new phase-switching algorithm where the execution of
                 single-partition and cross-partition transactions is
                 separated. In the partitioned phase, single-partition
                 transactions are run on multiple machines in parallel
                 to exploit more concurrency. In the single-master
                 phase, mastership for the entire database is switched
                 to a single designated master node, which can execute
                 these transactions without the use of expensive
                 coordination protocols like two-phase commit. Because
                 the master node has a full copy of the database, this
                 phase-switching can be done at negligible cost. Our
                 experiments on two popular benchmarks (YCSB and TPC-C)
                 show that high availability via replication can coexist
                 with fast serializable transaction execution in
                 distributed in-memory databases, with STAR
                 outperforming systems that employ conventional
                 concurrency control and replication algorithms by up to
                 one order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2019:SD,
  author =       "Yuliang Li and Aaron Feng and Jinfeng Li and Saran
                 Mumick and Alon Halevy and Vivian Li and Wang-Chiew
                 Tan",
  title =        "Subjective databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1330--1343",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342271",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Online users are constantly seeking experiences, such
                 as a hotel with clean rooms and a lively bar, or a
                 restaurant for a romantic rendezvous. However,
                 e-commerce search engines only support queries
                 involving objective attributes such as location, price,
                 and cuisine, and any experiential data is relegated to
                 text reviews. In order to support experiential queries,
                 a database system needs to model subjective data. Users
                 should be able to pose queries that specify subjective
                 experiences using their own words, in addition to
                 conditions on the usual objective attributes. This
                 paper introduces OpineDB, a subjective database system
                 that addresses these challenges. We introduce a data
                 model for subjective databases. We describe how OpineDB
                 translates subjective queries against the subjective
                 database schema, which is done by matching the user
                 query phrases to the underlying schema. We also show
                 how the experiential conditions specified by the user
                 can be combined and the results aggregated and ranked.
                 We demonstrate that subjective databases satisfy user
                 needs more effectively and accurately than alternative
                 techniques through experiments with real data of hotel
                 and restaurant reviews.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2019:FRD,
  author =       "Xuguang Ren and Junhu Wang and Wook-Shin Han and
                 Jeffrey Xu Yu",
  title =        "Fast and robust distributed subgraph enumeration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1344--1356",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342272",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the subgraph enumeration problem under
                 distributed settings. Existing solutions either suffer
                 from severe memory crisis or rely on large indexes,
                 which makes them impractical for very large graphs.
                 Most of them follow a synchronous model where the
                 performance is often bottlenecked by the machine with
                 the worst performance. Motivated by this, in this
                 paper, we propose RADS, a Robust Asynchronous
                 Distributed Subgraph enumeration system. RADS first
                 identifies results that can be found using
                 single-machine algorithms. This strategy not only
                 improves the overall performance but also reduces
                 network communication and memory cost. Moreover, RADS
                 employs a novel region-grouped multi-round expand
                 verify \& filter framework which does not need to
                 shuffle and exchange the intermediate results, nor does
                 it need to replicate a large part of the data graph in
                 each machine. This feature not only reduces network
                 communication cost and memory usage, but also allows us
                 to adopt simple strategies for memory control and load
                 balancing, making it more robust. Several optimization
                 strategies are also used in RADS to further improve the
                 performance. Our experiments verified the superiority
                 of RADS to state-of-the-art subgraph enumeration
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fu:2019:EEL,
  author =       "Fangeheng Fu and Jiawei Jiang and Yingxia Shao and Bin
                 Cui",
  title =        "An experimental evaluation of large scale {GBDT}
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1357--1370",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342273",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Gradient boosting decision tree (GBDT) is a
                 widely-used machine learning algorithm in both data
                 analytic competitions and real-world industrial
                 applications. Further, driven by the rapid increase in
                 data volume, efforts have been made to train GBDT in a
                 distributed setting to support large-scale workloads.
                 However, we find it surprising that the existing
                 systems manage the training dataset in different ways,
                 but none of them have studied the impact of data
                 management. To that end, this paper aims to study the
                 pros and cons of different data management methods
                 regarding the performance of distributed GBDT. We first
                 introduce a quadrant categorization of data management
                 policies based on data partitioning and data storage.
                 Then we conduct an in-depth systematic analysis and
                 summarize the advantageous scenarios of the quadrants.
                 Based on the analysis, we further propose a novel
                 distributed GBDT system named Vero, which adopts the
                 unexplored composition of vertical partitioning and
                 row-store and suits for many large-scale cases. To
                 validate our analysis empirically, we implement
                 different quadrants in the same code base and compare
                 them under extensive workloads, and finally compare
                 Vero with other state-of-the-art systems over a wide
                 range of datasets. Our theoretical and experimental
                 results provide a guideline on choosing a proper data
                 management policy for a given workload.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kotsogiannis:2019:PDP,
  author =       "Ios Kotsogiannis and Yuchao Tao and Xi He and Maryam
                 Fanaeepour and Ashwin Machanavajjhala and Michael Hay
                 and Gerome Miklau",
  title =        "{PrivateSQL}: a differentially private {SQL} query
                 engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1371--1384",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342274",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Differential privacy is considered a de facto standard
                 for private data analysis. However, the definition and
                 much of the supporting literature applies to flat
                 tables. While there exist variants of the definition
                 and specialized algorithms for specific types of
                 relational data (e.g. graphs), there isn't a general
                 privacy definition for multi-relational schemas with
                 constraints, and no system that permits accurate
                 differentially private answering of SQL queries while
                 imposing a fixed privacy budget across all queries
                 posed by the analyst. This work presents PrivateSQL, a
                 first-of-its-kind end-to-end differentially private
                 relational database system. PrivateSQL allows an
                 analyst to query data stored in a standard database
                 management system using a rich class of SQL counting
                 queries. PrivateSQL adopts a novel generalization of
                 differential privacy to multi-relational data that
                 takes into account constraints in the schema like
                 foreign keys, and allows the data owner to flexibly
                 specify entities in the schema that need privacy.
                 PrivateSQL ensures a fixed privacy loss across all the
                 queries posed by the analyst by answering queries on
                 private synopses generated from several views over the
                 base relation that are tuned to have low error on a
                 representative query workload. We experimentally
                 evaluate PrivateSQL on a real-world dataset and a
                 workload of more than 3, 600 queries. We show that for
                 50\% of the queries PrivateSQL offers at least 1, 000x
                 better error rates than solutions adapted from prior
                 work.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amiri:2019:CCA,
  author =       "Mohammad Javad Amiri and Divyakant Agrawal and Amr {El
                 Abbadi}",
  title =        "{CAPER}: a cross-application permissioned blockchain",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1385--1398",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342275",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Despite recent intensive research, existing blockchain
                 systems do not adequately address all the
                 characteristics of distributed applications. In
                 particular, distributed applications collaborate with
                 each other following service level agreements (SLAs) to
                 provide different services. While collaboration between
                 applications, e.g., cross-application transactions,
                 should be visible to all applications, the internal
                 data of each application, e.g, internal transactions,
                 might be confidential. In this paper, we introduce
                 CAPER, a permissioned blockchain system to support both
                 internal and cross-application transactions of
                 collaborating distributed applications. In CAPER, the
                 blockchain ledger is formed as a directed acyclic graph
                 where each application accesses and maintains only its
                 own view of the ledger including its internal and all
                 cross-application transactions. CAPER also introduces
                 three consensus protocols to globally order
                 cross-application transactions between applications
                 with different internal consensus protocols. The
                 experimental results reveal the efficiency of CAPER in
                 terms of performance and scalability.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Koliousis:2019:CSD,
  author =       "Alexandros Koliousis and Pijika Watcharapichat and
                 Matthias Weidlich and Luo Mai and Paolo Costa and Peter
                 Pietzuch",
  title =        "{Crossbow}: scaling deep learning with small batch
                 sizes on multi-{GPU} servers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1399--1412",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342276",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Deep learning models are trained on servers with many
                 GPUs, and training must scale with the number of GPUs.
                 Systems such as TensorFlow and Caffe2 train models with
                 parallel synchronous stochastic gradient descent: they
                 process a batch of training data at a time, partitioned
                 across GPUs, and average the resulting partial
                 gradients to obtain an updated global model. To fully
                 utilise all GPUs, systems must increase the batch size,
                 which hinders statistical efficiency. Users tune
                 hyper-parameters such as the learning rate to
                 compensate for this, which is complex and
                 model-specific. We describe Crossbow, a new
                 single-server multi-GPU system for training deep
                 learning models that enables users to freely choose
                 their preferred batch size---however small---while
                 scaling to multiple GPUs. Crossbow uses many parallel
                 model replicas and avoids reduced statistical
                 efficiency through a new synchronous training method.
                 We introduce SMA, a synchronous variant of model
                 averaging in which replicas independently explore the
                 solution space with gradient descent, but adjust their
                 search synchronously based on the trajectory of a
                 globally-consistent average model. Crossbow achieves
                 high hardware efficiency with small batch sizes by
                 potentially training multiple model replicas per GPU,
                 automatically tuning the number of replicas to maximise
                 throughput. our experiments show that Crossbow improves
                 the training time of deep learning models on an 8-GPU
                 server by 1.3--4X compared to TensorFlow.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Feng:2019:FAA,
  author =       "Kaiyu Feng and Gao Cong and Christian S. Jensen and
                 Tao Guo",
  title =        "Finding attribute-aware similar regions for data
                 analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1414--1426",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342277",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the proliferation of mobile devices and
                 location-based services, increasingly massive volumes
                 of geo-tagged data are becoming available. This data
                 typically also contains non-location information. We
                 study how to use such information to characterize a
                 region and then how to find a region of the same size
                 and with the most similar characteristics. This
                 functionality enables a user to identify regions that
                 share characteristics with a user-supplied region that
                 the user is familiar with and likes. More specifically,
                 we formalize and study a new problem called the
                 attribute-aware similar region search ( ASRS ) problem.
                 We first define so-called composite aggregators that
                 are able to express aspects of interest in terms of the
                 information associated with a user-supplied region.
                 When applied to a region, an aggregator captures the
                 region's relevant characteristics. Next, given a query
                 region and a composite aggregator, we propose a novel
                 algorithm called DS-Search to find the most similar
                 region of the same size. Unlike any previous work on
                 region search, DS-Search repeatedly discretizes and
                 splits regions until an split region either satisfies a
                 drop condition or it is guaranteed to not contribute to
                 the result. In addition, we extend DS-Search to solve
                 the ASRS problem approximately. Finally, we report on
                 extensive empirical studies that offer insight into the
                 efficiency and effectiveness of the paper's
                 proposals.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tang:2019:IQP,
  author =       "Dixin Tang and Zechao Shang and Aaron J. Elmore and
                 Sanjay Krishnan and Michael J. Franklin",
  title =        "Intermittent query processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1427--1441",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342278",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many applications ingest data in an intermittent, yet
                 largely predictable, pattern. Existing systems tend to
                 ignore how data arrives when making decisions about how
                 to update (or refresh) an ongoing query. To address
                 this shortcoming we propose a new query processing
                 paradigm, Intermittent Query Processing (IQP), that
                 bridges query execution and policies, to determine when
                 to update results and how much resources to allocate
                 for ensuring fast query updates. Here, for a query the
                 system provides an initial result that is to be
                 refreshed when policy dictates, such as after a defined
                 number of new records arrive or a time interval
                 elapses. In between intermittent data arrivals, IQP
                 inactivates query execution by selectively releasing
                 some resources occupied in normal execution that will
                 be least helpful (for future refreshes) according to
                 the arrival patterns for new records. We present an IQP
                 prototype based on PostgreSQL that selectively persists
                 the state associated with query operators to allow for
                 fast query updates while constraining resource
                 consumption. Our experiments show that for several
                 application scenarios IQP greatly lowers query
                 processing latency compared to batch systems, and
                 largely reduces memory consumption with comparable
                 latency compared to a state-of-the-art incremental view
                 maintenance technique.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Budiu:2019:HTC,
  author =       "Mihai Budiu and Parikshit Gopalan and Lalith Suresh
                 and Udi Wieder and Han Kruiger and Marcos K. Aguilera",
  title =        "{Hillview}: a trillion-cell spreadsheet for big data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1442--1457",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342279",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Hillview is a distributed spreadsheet for browsing
                 very large datasets that cannot be handled by a single
                 machine. As a spread-sheet, Hillview provides a high
                 degree of interactivity that permits data analysts to
                 explore information quickly along many dimensions while
                 switching visualizations on a whim. To provide the
                 required responsiveness, Hillview introduces
                 visualization sketches, or vizketches, as a simple idea
                 to produce compact data visualizations. Vizketches
                 combine algorithmic techniques for data summarization
                 with computer graphics principles for efficient
                 rendering. While simple, vizketches are effective at
                 scaling the spreadsheet by parallelizing computation,
                 reducing communication, providing progressive
                 visualizations, and offering precise accuracy
                 guarantees. Using Hillview running on eight servers, we
                 can navigate and visualize datasets of tens of billions
                 of rows and trillions of cells, much beyond the
                 published capabilities of competing systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wei:2019:EFD,
  author =       "Ziheng Wei and Sebastian Link",
  title =        "Embedded functional dependencies and data-completeness
                 tailored database design",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1458--1470",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342626",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We establish a robust schema design framework for data
                 with missing values. The framework is based on the new
                 notion of an embedded functional dependency, which is
                 independent of the interpretation of missing values,
                 able to express completeness and integrity requirements
                 on application data, and capable of capturing many
                 redundant data value occurrences. We establish
                 axiomatic and algorithmic foundations for reasoning
                 about embedded functional dependencies. These
                 foundations allow us to establish generalizations of
                 Boyce-Codd and Third normal forms that do not permit
                 any redundancy in any future application data, or
                 minimize their redundancy across dependency-preserving
                 decompositions, respectively. We show how to transform
                 any given schema into application schemata that meet
                 given completeness and integrity requirements and the
                 conditions of the generalized normal forms. Data over
                 those application schemata are therefore fit for
                 purpose by design. Extensive experiments with benchmark
                 schemata and data illustrate our framework, and the
                 effectiveness and efficiency of our algorithms, but
                 also provide quantified insight into database schema
                 design trade-offs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fan:2019:OVG,
  author =       "Hua Fan and Wojciech Golab",
  title =        "{Ocean Vista}: gossip-based visibility control for
                 speedy geo-distributed transactions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1471--1484",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342627",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Providing ACID transactions under conflicts across
                 globally distributed data is the Everest of transaction
                 processing protocols. Transaction processing in this
                 scenario is particularly costly due to the high latency
                 of cross-continent network links, which inflates
                 concurrency control and data replication overheads. To
                 mitigate the problem, we introduce Ocean Vista --- a
                 novel distributed protocol that guarantees strict
                 serializability. We observe that concurrency control
                 and replication address different aspects of resolving
                 the visibility of transactions, and we address both
                 concerns using a multi-version protocol that tracks
                 visibility using version watermarks and arrives at
                 correct visibility decisions using efficient gossip.
                 Gossiping the watermarks enables asynchronous
                 transaction processing and acknowledging transaction
                 visibility in batches in the concurrency control and
                 replication protocols, which improves efficiency under
                 high cross-datacenter network delays. In particular,
                 Ocean Vista can process conflicting transactions in
                 parallel, and supports efficient write-quorum /
                 read-one access using one round trip in the common
                 case. We demonstrate experimentally in a
                 multi-data-center cloud environment that our design
                 outperforms a leading distributed transaction
                 processing engine (TAPIR) more than 10-fold in terms of
                 peak throughput, albeit at the cost of additional
                 latency for gossip. The latency penalty is generally
                 bounded by one wide area network (WAN) round trip time
                 (RTT), and in the best case (i.e., under light load)
                 our system nearly breaks even with TAPIR by committing
                 transactions in around one WAN RTT.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:INF,
  author =       "Xikui Wang and Michael J. Carey",
  title =        "An {IDEA}: an ingestion framework for data enrichment
                 in {asterixDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1485--1498",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342628",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Big Data today is being generated at an unprecedented
                 rate from various sources such as sensors,
                 applications, and devices, and it often needs to be
                 enriched based on other reference information to
                 support complex analytical queries. Depending on the
                 use case, the enrichment operations can be compiled
                 code, declarative queries, or machine learning models
                 with different complexities. For enrichments that will
                 be frequently used in the future, it can be
                 advantageous to push their computation into the
                 ingestion pipeline so that they can be stored (and
                 queried) together with the data. In some cases, the
                 referenced information may change over time, so the
                 ingestion pipeline should be able to adapt to such
                 changes to guarantee the currency and/or correctness of
                 the enrichment results. In this paper, we present a new
                 data ingestion framework that supports data ingestion
                 at scale, enrichments requiring complex operations, and
                 adaptiveness to reference data changes. We explain how
                 this framework has been built on top of Apache
                 AsterixDB and investigate its performance at scale
                 under various workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karyakin:2019:DMP,
  author =       "Alexey Karyakin and Kenneth Salem",
  title =        "{DimmStore}: memory power optimization for database
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1499--1512",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.33422629",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Memory can consume a substantial amount of power in
                 database servers, yet memory power has received
                 considerably less attention than CPU power. Memory
                 power consumption is also highly non-proportional.
                 Thus, memory power becomes even more significant in the
                 common case in which a database server is either not
                 completely busy or not completely full. In this paper,
                 we study the application of two memory power
                 optimization techniques --- rank-aware allocation and
                 rate-based layout --- to database systems. By
                 concentrating memory load, rather than spreading it out
                 evenly, these techniques create and exploit memory
                 idleness to achieve power savings. We have implemented
                 these techniques in a prototype database system called
                 DimmStore. DimmStore is part of a memory power testbed
                 which includes customized hardware with direct power
                 measurement capabilities, allowing us to measure the
                 techniques' effectiveness. We use the testbed to
                 empirically characterize the power saving opportunities
                 provided by these techniques, as well as their
                 performance impact, under YCSB and TPC-C workloads.
                 Under simple YCSB workloads, power savings ranged up to
                 50\%, depending on load and space utilization, with
                 little performance impact. Savings were smaller, but
                 still significant, for TPC-C, which has more complex
                 data locality characteristics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yan:2019:GAS,
  author =       "Cong Yan and Alvin Cheung",
  title =        "Generating application-specific data layouts for
                 in-memory databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1513--1525",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342630",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database applications are often developed with
                 object-oriented languages while using relational
                 databases as the backend. To accelerate these
                 applications, developers would manually design
                 customized data structures to store data in memory, and
                 ways to utilize such data structures to answer queries.
                 Doing so is brittle and requires a lot of effort.
                 Alternatively, developers might automate the process by
                 using relational physical design tools to create
                 materialized views and indexes instead. However, the
                 characteristics of object-oriented database
                 applications are often distinct enough from traditional
                 database applications such that classical relational
                 query optimization techniques often cannot speed up
                 queries that arise from such applications, as our
                 experiments show. To address this, we build Chestnut, a
                 data layout generator for in-memory object-oriented
                 database applications. Given a memory budget, Chestnut
                 generates customized in-memory data layouts and query
                 plans to answer queries written using a subset of the
                 Rails API, a common framework for building
                 object-oriented database applications. Chestnut differs
                 from traditional query optimizers and physical
                 designers in two ways. First, Chestnut automatically
                 generates data layouts that are customized for the
                 application after analyzing their queries, hence
                 Chestnut-generated data layouts are designed to be
                 efficient to answer queries from such applications.
                 second, Chestnut uses a novel enumeration and
                 verification-based algorithm to generate query plans
                 that use such data layouts, rather than rule-based
                 approaches as in traditional query optimizers. We
                 evaluated Chestnut on four open-source Rails database
                 applications. The result shows that it can reduce
                 average query processing time by over 3.6X (and up to
                 42X), as compared to other in-memory relational
                 database engines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hai:2019:RPT,
  author =       "Rihan Hai and Christoph Quix",
  title =        "Rewriting of plain {SO} tgds into nested tgds",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1526--1538",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342631",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Schema mappings express the relationships between
                 sources in data interoperability scenarios and can be
                 expressed in various formalisms. Source-to-target
                 tuple-generating dependencies (s-t tgds) can be easily
                 used for data transformation or query rewriting tasks.
                 Second-order tgds (SO tgds) are more expressive as they
                 can also represent the composition and inversion of s-t
                 tgds. Yet, the expressive power of SO tgds comes with
                 the problem of undecidability for some reasoning tasks.
                 Nested tgds and plain SO tgds are mapping languages
                 that are between s-t tgds and SO tgds in terms of
                 expressivity, and their properties have been studied in
                 the recent years. Nested tgds are less expressive than
                 plain SO tgds, but the logical equivalence problem for
                 nested tgds is decidable. However, a detailed
                 characterization of plain SO tgds that have an
                 equivalent nested tgd is missing. In this paper, we
                 present an algorithmic solution for translating plain
                 SO tgds into nested tgds. The algorithm computes one or
                 more nested tgds, if a given plain SO tgd is
                 rewritable. Furthermore, we are able to give a detailed
                 characterization of those plain SO tgds for which an
                 equivalent nested tgd exists, based on the structural
                 properties of the source predicates and Skolem
                 functions in the plain SO tgd. In the evaluation, we
                 show that our algorithm covers a larger subset of plain
                 SO tgds than previous approaches and that a rewriting
                 can be computed efficiently although the algorithm has
                 the exponential complexity.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nathan:2019:BMD,
  author =       "Senthil Nathan and Chander Govindarajan and Adarsh
                 Saraf and Manish Sethi and Praveen Jayachandran",
  title =        "Blockchain meets database: design and implementation
                 of a blockchain relational database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1539--1552",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342632",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we design and implement the first-ever
                 decentralized replicated relational database with
                 blockchain properties that we term blockchain
                 relational database. We highlight several similarities
                 between features provided by blockchain platforms and a
                 replicated relational database, although they are
                 conceptually different, primarily in their trust model.
                 Motivated by this, we leverage the rich features,
                 decades of research and optimization, and available
                 tooling in relational databases to build a blockchain
                 relational database. We consider a permissioned
                 blockchain model of known, but mutually distrustful
                 organizations each operating their own database
                 instance that are replicas of one another. The replicas
                 execute transactions independently and engage in
                 decentralized consensus to determine the commit order
                 for transactions. We design two approaches, the first
                 where the commit order for transactions is agreed upon
                 prior to executing them, and the second where
                 transactions are executed without prior knowledge of
                 the commit order while the ordering happens in
                 parallel. We leverage serializable snapshot isolation
                 (SSI) to guarantee that the replicas across nodes
                 remain consistent and respect the ordering determined
                 by consensus, and devise a new variant of SSI based on
                 block height for the latter approach. We implement our
                 system on PostgreSQL and present detailed performance
                 experiments analyzing both approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kunft:2019:IRO,
  author =       "Andreas Kunft and Asterios Katsifodimos and Sebastian
                 Schelter and Sebastian Bre{\ss} and Tilmann Rabl and
                 Volker Markl",
  title =        "An intermediate representation for optimizing machine
                 learning pipelines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1553--1567",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342633",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Machine learning (ML) pipelines for model training and
                 validation typically include preprocessing, such as
                 data cleaning and feature engineering, prior to
                 training an ML model. Preprocessing combines relational
                 algebra and user-defined functions (UDFs), while model
                 training uses iterations and linear algebra. Current
                 systems are tailored to either of the two. As a
                 consequence, preprocessing and ML steps are optimized
                 in isolation. To enable holistic optimization of ML
                 training pipelines, we present Lara, a declarative
                 domain-specific language for collections and matrices.
                 Lara's inter-mediate representation (IR) reflects on
                 the complete program, i.e., UDFs, control flow, and
                 both data types. Two views on the IR enable diverse
                 optimizations. Monads enable operator pushdown and
                 fusion across type and loop boundaries. Combinators
                 provide the semantics of domain-specific operators and
                 optimize data access and cross-validation of ML
                 algorithms. Our experiments on preprocessing pipelines
                 and selected ML algorithms show the effects of our
                 proposed optimizations on dense and sparse data, which
                 achieve speedups of up to an order of magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fang:2019:ARD,
  author =       "Yuanwei Fang and Chen Zou and Andrew A. Chien",
  title =        "Accelerating raw data analysis with the {ACCORDA}
                 software and hardware architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1568--1582",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342634",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The data science revolution and growing popularity of
                 data lakes make efficient processing of raw data
                 increasingly important. To address this, we propose the
                 ACCelerated Operators for Raw Data Analysis (ACCORDA)
                 architecture. By extending the operator interface
                 (subtype with encoding) and employing a uniform runtime
                 worker model, ACCORDA integrates data transformation
                 acceleration seamlessly, enabling a new class of
                 encoding optimizations and robust high-performance raw
                 data processing. Together, these key features preserve
                 the software system architecture, empowering
                 state-of-art heuristic optimizations to drive flexible
                 data encoding for performance. ACCORDA derives
                 performance from its software architecture, but depends
                 critically on the acceleration of the Unstructured Data
                 Processor (UDP) that is integrated into the
                 memory-hierarchy, and accelerates data transformation
                 tasks by 16x-21x (parsing, decompression) to as much as
                 160x (deserialization) compared to an x86 core. We
                 evaluate ACCORDA using TPC-H queries on tabular data
                 formats, exercising raw data properties such as parsing
                 and data conversion. The ACCORDA system achieves
                 2.9x-13.2x speedups when compared to SparkSQL, reducing
                 raw data processing overhead to a geomean of 1.2x
                 (20\%). In doing so, ACCORDA robustly matches or
                 outperforms prior systems that depend on caching loaded
                 data, while computing on raw, unloaded data. This
                 performance benefit is robust across format complexity,
                 query predicates, and selectivity (data statistics).
                 ACCORDA's encoding-extended operator interface unlocks
                 aggressive encoding-oriented optimizations that deliver
                 80\% average performance increase over the 7 affected
                 TPC-H queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Siddique:2019:CST,
  author =       "A. B. Siddique and Ahmed Eldawy and Vagelis
                 Hristidis",
  title =        "Comparing synopsis techniques for approximate spatial
                 data analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1583--1596",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342635",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The increasing amount of spatial data calls for new
                 scalable query processing techniques. One of the
                 techniques that are getting attention is data synopsis,
                 which summarizes the data using samples or histograms
                 and computes an approximate answer based on the
                 synopsis. This general technique is used in selectivity
                 estimation, clustering, partitioning, load balancing,
                 and visualization, among others. This paper
                 experimentally studies four spatial data synopsis
                 techniques for three common data analysis problems,
                 namely, selectivity estimation, k-means clustering, and
                 spatial partitioning. We run an extensive experimental
                 evaluation on both real and synthetic datasets of up to
                 2.7 billion records to study the trade-offs between the
                 synopsis methods and their applicability in big spatial
                 data analysis. For each of the three problems, we
                 compare with baseline techniques that operate on the
                 whole dataset and evaluate the synopsis generation
                 time, the time for computing an approximate answer on
                 the synopsis, and the accuracy of the result. We
                 present our observations about when each synopsis
                 technique performs best.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{El-Hindi:2019:BSD,
  author =       "Muhammad El-Hindi and Carsten Binnig and Arvind Arasu
                 and Donald Kossmann and Ravi Ramamurthy",
  title =        "{BlockchainDB}: a shared database on blockchains",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1597--1609",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342636",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper we present BlockchainDB, which leverages
                 blockchains as a storage layer and introduces a
                 database layer on top that extends blockchains by
                 classical data management techniques (e.g., sharding)
                 as well as a standardized query interface to facilitate
                 the adoption of blockchains for data sharing use cases.
                 We show that by introducing the additional database
                 layer, we are able to improve the performance and
                 scalability when using blockchains for data sharing and
                 also massively decrease the complexity for
                 organizations intending to use blockchains for data
                 sharing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jia:2019:ETS,
  author =       "Ruoxi Jia and David Dao and Boxin Wang and Frances Ann
                 Hubis and Nezihe Merve Gurel and Bo Li and Ce Zhang and
                 Costas Spanos and Dawn Song",
  title =        "Efficient task-specific data valuation for nearest
                 neighbor algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1610--1623",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342637",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a data set D containing millions of data points
                 and a data consumer who is willing to pay for \$X to
                 train a machine learning (ML) model over D, how should
                 we distribute this \$X to each data point to reflect
                 its ``value''? In this paper, we define the ``relative
                 value of data'' via the Shapley value, as it uniquely
                 possesses properties with appealing real-world
                 interpretations, such as fairness, rationality and
                 decentralizability. For general, bounded utility
                 functions, the Shapley value is known to be challenging
                 to compute: to get Shapley values for all N data
                 points, it requires O (2$^N$ ) model evaluations for
                 exact computation and O ( N log N ) for ( \epsilon,
                 \delta )-approximation. In this paper, we focus on one
                 popular family of ML models relying on K -nearest
                 neighbors ( K NN). The most surprising result is that
                 for unweighted K NN classifiers and regressors, the
                 Shapley value of all N data points can be computed,
                 exactly, in O ( N log N ) time --- an exponential
                 improvement on computational complexity! Moreover, for
                 ( \epsilon, \delta )-approximation, we are able to
                 develop an algorithm based on Locality Sensitive
                 Hashing (LSH) with only sublinear complexity O ( N$^{h
                 (\epsilon, K)}$ log N ) when \epsilon is not too small
                 and K is not too large. We empirically evaluate our
                 algorithms on up to 10 million data points and even our
                 exact algorithm is up to three orders of magnitude
                 faster than the baseline approximation algorithm. The
                 LSH-based approximation algorithm can accelerate the
                 value calculation process even further. We then extend
                 our algorithm to other scenarios such as (1) weighed K
                 NN classifiers, (2) different data points are clustered
                 by different data curators, and (3) there are data
                 analysts providing computation who also requires proper
                 valuation. Some of these extensions, although also
                 being improved exponentially, are less practical for
                 exact computation (e.g., O ( N$^K$ ) complexity for
                 weighted K NN). We thus propose an Monte Carlo
                 approximation algorithm, which is O ( N (log N )$^2$
                 /(log K )$^2$ ) times more efficient than the baseline
                 approximation algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Saxena:2019:DID,
  author =       "Hemant Saxena and Lukasz Golab and Ihab F. Ilyas",
  title =        "Distributed implementations of dependency discovery
                 algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1624--1636",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342638",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We analyze the problem of discovering dependencies
                 from distributed big data. Existing (non-distributed)
                 algorithms focus on minimizing computation by pruning
                 the search space of possible dependencies. However,
                 distributed algorithms must also optimize communication
                 costs, especially in shared-nothing settings, leading
                 to a more complex optimization space. To understand
                 this space, we introduce six primitives shared by
                 existing dependency discovery algorithms, corresponding
                 to data processing steps separated by communication
                 barriers. Through case studies, we show how the
                 primitives allow us to analyze the design space and
                 develop communication-optimized implementations.
                 Finally, we support our analysis with an experimental
                 evaluation on real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zamanian:2019:RDH,
  author =       "Erfan Zamanian and Xiangyao Yu and Michael Stonebraker
                 and Tim Kraska",
  title =        "Rethinking database high availability with {RDMA}
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1637--1650",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342639",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Highly available database systems rely on data
                 replication to tolerate machine failures. Both classes
                 of existing replication algorithms, active-passive and
                 active-active, were designed in a time when network was
                 the dominant performance bottleneck. In essence, these
                 techniques aim to minimize network communication
                 between replicas at the cost of incurring more
                 processing redundancy; a trade-off that suitably fitted
                 the conventional wisdom of distributed database design.
                 However, the emergence of next-generation networks with
                 high throughput and low latency calls for revisiting
                 these assumptions. In this paper, we first make the
                 case that in modern RDMA-enabled networks, the
                 bottleneck has shifted to CPUs, and therefore the
                 existing network-optimized replication techniques are
                 no longer optimal. We present Active-Memory
                 Replication, a new high availability scheme that
                 efficiently leverages RDMA to completely eliminate the
                 processing redundancy in replication. Using
                 Active-Memory, all replicas dedicate their processing
                 power to executing new transactions, as opposed to
                 performing redundant computation. Active-Memory
                 maintains high availability and correctness in the
                 presence of failures through an efficient RDMA-based
                 undo-logging scheme. Our evaluation against
                 active-passive and active-active schemes shows that
                 Active-Memory is up to a factor of 2 faster than the
                 second-best protocol on RDMA-based networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bressan:2019:MFM,
  author =       "Marco Bressan and Stefano Leucci and Alessandro
                 Panconesi",
  title =        "{Motivo}: fast motif counting via succinct color
                 coding and adaptive sampling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1651--1663",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342640",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The randomized technique of color coding is behind
                 state-of-the-art algorithms for estimating graph motif
                 counts. Those algorithms, however, are not yet capable
                 of scaling well to very large graphs with billions of
                 edges. In this paper we develop novel tools for the
                 ``motif counting via color coding'' framework. As a
                 result, our new algorithm, MOTIYO, scales to much
                 larger graphs while at the same time providing more
                 accurate motif counts than ever before. This is
                 achieved thanks to two types of improvements. First, we
                 design new succinct data structures for fast color
                 coding operations, and a biased coloring trick that
                 trades accuracy versus resource usage. These
                 optimizations drastically reduce the resource
                 requirements of color coding. Second, we develop an
                 adaptive motif sampling strategy, based on a fractional
                 set cover problem, that breaks the additive
                 approximation barrier of standard sampling. This gives
                 multiplicative approximations for all motifs at once,
                 allowing us to count not only the most frequent motifs
                 but also extremely rare ones. To give an idea of the
                 improvements, in 40 minutes MOTIVO counts 7-nodes
                 motifs on a graph with 65M nodes and 1.8B edges; this
                 is 30 and 500 times larger than the state of the art,
                 respectively in terms of nodes and edges. On the
                 accuracy side, in one hour MOTIVO produces accurate
                 counts of \approx 10.000 distinct 8-node motifs on
                 graphs where state-of-the-art algorithms fail even to
                 find the second most frequent motif. Our method
                 requires just a high-end desktop machine. These results
                 show how color coding can bring motif mining to the
                 realm of truly massive graphs using only ordinary
                 hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Poddar:2019:AED,
  author =       "Rishabh Poddar and Tobias Boelter and Raluca Ada
                 Popa",
  title =        "{Arx}: an encrypted database using semantically secure
                 encryption",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1664--1678",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342641",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In recent years, encrypted databases have emerged as a
                 promising direction that provides data confidentiality
                 without sacrificing functionality: queries are executed
                 on encrypted data. However, many practical proposals
                 rely on a set of weak encryption schemes that have been
                 shown to leak sensitive data. In this paper, we propose
                 Arx, a practical and functionally rich database system
                 that encrypts the data only with semantically secure
                 encryption schemes. We show that Arx supports real
                 applications such as ShareLaTeX with a modest
                 performance overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gao:2019:EKG,
  author =       "Junyang Gao and Xian Li and Yifan Ethan Xu and
                 Bunyamin Sisman and Xin Luna Dong and Jun Yang",
  title =        "Efficient knowledge graph accuracy evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1679--1691",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342642",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Estimation of the accuracy of a large-scale knowledge
                 graph (KG) often requires humans to annotate samples
                 from the graph. How to obtain statistically meaningful
                 estimates for accuracy evaluation while keeping human
                 annotation costs low is a problem critical to the
                 development cycle of a KG and its practical
                 applications. Surprisingly, this challenging problem
                 has largely been ignored in prior research. To address
                 the problem, this paper proposes an efficient sampling
                 and evaluation framework, which aims to provide quality
                 accuracy evaluation with strong statistical guarantee
                 while minimizing human efforts. Motivated by the
                 properties of the annotation cost function observed in
                 practice, we propose the use of cluster sampling to
                 reduce the overall cost. We further apply weighted and
                 two-stage sampling as well as stratification for better
                 sampling designs. We also extend our framework to
                 enable efficient incremental evaluation on evolving KG,
                 introducing two solutions based on stratified sampling
                 and a weighted variant of reservoir sampling. Extensive
                 experiments on real-world datasets demonstrate the
                 effectiveness and efficiency of our proposed solution.
                 Compared to baseline approaches, our best solutions can
                 provide up to 60\% cost reduction on static KG
                 evaluation and up to 80\% cost reduction on evolving KG
                 evaluation, without loss of evaluation quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Mhedhbi:2019:OSQ,
  author =       "Amine Mhedhbi and Semih Salihoglu",
  title =        "Optimizing subgraph queries by combining binary and
                 worst-case optimal joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1692--1704",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342643",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of optimizing subgraph queries
                 using the new worst-case optimal join plans. Worst-case
                 optimal plans evaluate queries by matching one query
                 vertex at a time using multi-way intersections. The
                 core problem in optimizing worst-case optimal plans is
                 to pick an ordering of the query vertices to match. We
                 design a cost-based optimizer that (i) picks efficient
                 query vertex orderings for worst-case optimal plans;
                 and (ii) generates hybrid plans that mix traditional
                 binary joins with worst-case optimal style multiway
                 intersections. Our cost metric combines the cost of
                 binary joins with a new cost metric called
                 intersection-cost. The plan space of our optimizer
                 contains plans that are not in the plan spaces based on
                 tree decompositions from prior work. In addition to our
                 optimizer, we describe an adaptive technique that
                 changes the orderings of the worst-case optimal
                 subplans during query execution. We demonstrate the
                 effectiveness of the plans our optimizer picks and the
                 effectiveness of the adaptive technique through
                 extensive experiments. Our optimizer is integrated into
                 the Graphflow DBMS.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Marcus:2019:NLQ,
  author =       "Ryan Marcus and Parimarjan Negi and Hongzi Mao and Chi
                 Zhang and Mohammad Alizadeh and Tim Kraska and Olga
                 Papaemmanouil and Nesime Tatbul",
  title =        "{Neo}: a learned query optimizer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1705--1718",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342644",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query optimization is one of the most challenging
                 problems in database systems. Despite the progress made
                 over the past decades, query optimizers remain
                 extremely complex components that require a great deal
                 of hand-tuning for specific workloads and datasets.
                 Motivated by this shortcoming and inspired by recent
                 advances in applying machine learning to data
                 management challenges, we introduce Neo ( Neural
                 Optimizer ), a novel learning-based query optimizer
                 that relies on deep neural networks to generate query
                 executions plans. Neo bootstraps its query optimization
                 model from existing optimizers and continues to learn
                 from incoming queries, building upon its successes and
                 learning from its failures. Furthermore, Neo naturally
                 adapts to underlying data patterns and is robust to
                 estimation errors. Experimental results demonstrate
                 that Neo, even when bootstrapped from a simple
                 optimizer like PostgreSQL, can learn a model that
                 offers similar performance to state-of-the-art
                 commercial optimizers, and in some cases even surpass
                 them.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fang:2019:EAD,
  author =       "Yixiang Fang and Kaiqiang Yu and Reynold Cheng and
                 Laks V. S. Lakshmanan and Xuemin Lin",
  title =        "Efficient algorithms for densest subgraph discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1719--1732",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342645",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Densest subgraph discovery (DSD) is a fundamental
                 problem in graph mining. It has been studied for
                 decades, and is widely used in various areas, including
                 network science, biological analysis, and graph
                 databases. Given a graph G, DSD aims to find a subgraph
                 D of G with the highest density (e.g., the number of
                 edges over the number of vertices in D ). Because DSD
                 is difficult to solve, we propose a new solution
                 paradigm in this paper. Our main observation is that
                 the densest subgraph can be accurately found through a
                 k -core (a kind of dense subgraph of G ), with
                 theoretical guarantees. Based on this intuition, we
                 develop efficient exact and approximation solutions for
                 DSD. Moreover, our solutions are able to find the
                 densest subgraphs for a wide range of graph density
                 definitions, including clique-based- and general
                 pattern-based density. We have performed extensive
                 experimental evaluation on both real and synthetic
                 datasets. Our results show that our algorithms are up
                 to four orders of magnitude faster than existing
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Marcus:2019:PSD,
  author =       "Ryan Marcus and Olga Papaemmanouil",
  title =        "Plan-structured deep neural network models for query
                 performance prediction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1733--1746",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342646",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query performance prediction, the task of predicting a
                 query's latency prior to execution, is a challenging
                 problem in database management systems. Existing
                 approaches rely on features and performance models
                 engineered by human experts, but often fail to capture
                 the complex interactions between query operators and
                 input relations, and generally do not adapt naturally
                 to workload characteristics and patterns in query
                 execution plans. In this paper, we argue that deep
                 learning can be applied to the query performance
                 prediction problem, and we introduce a novel neural
                 network architecture for the task: a plan-structured
                 neural network. Our neural network architecture matches
                 the structure of any optimizer-selected query execution
                 plan and predict its latency with high accuracy, while
                 eliminating the need for human-crafted input features.
                 A number of optimizations are also proposed to reduce
                 training overhead without sacrificing effectiveness. We
                 evaluated our techniques on various workloads and we
                 demonstrate that our approach can out-perform the
                 state-of-the-art in query performance prediction.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ren:2019:SSL,
  author =       "Kun Ren and Dennis Li and Daniel J. Abadi",
  title =        "{SLOG}: serializable, low-latency, geo-replicated
                 transactions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1747--1761",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342647",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "For decades, applications deployed on a world-wide
                 scale have been forced to give up at least one of (1)
                 strict serializability (2) low latency writes (3) high
                 transactional throughput. In this paper we discuss
                 SLOG: a system that avoids this tradeoff for workloads
                 which contain physical region locality in data access.
                 SLOG achieves high-throughput, strictly serializable
                 ACID transactions at geo-replicated distance and scale
                 for all transactions submitted across the world, all
                 the while achieving low latency for transactions that
                 initiate from a location close to the home region for
                 data they access. Experiments find that SLOG can reduce
                 latency by more than an order of magnitude relative to
                 state-of-the-art strictly serializable geo-replicated
                 database systems such as Spanner and Calvin, while
                 maintaining high throughput under contention.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Paparrizos:2019:GET,
  author =       "John Paparrizos and Michael J. Franklin",
  title =        "{GRAIL}: efficient time-series representation
                 learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "11",
  pages =        "1762--1777",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3342263.3342648",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The analysis of time series is becoming increasingly
                 prevalent across scientific disciplines and industrial
                 applications. The effectiveness and the scalability of
                 time-series mining techniques critically depend on
                 design choices for three components responsible for (i)
                 representing; (ii) comparing; and (iii) indexing time
                 series. Unfortunately, these components have to date
                 been investigated and developed independently, often
                 resulting in mutually incompatible methods. The lack of
                 a unified approach has hindered progress towards fast
                 and accurate analytics over massive time-series
                 collections. To address this major drawback, we present
                 GRAIL, a generic framework to learn compact time-series
                 representations that preserve the properties of a
                 user-specified comparison function. Given the
                 comparison function, GRAIL (i) extracts landmark time
                 series using clustering; (ii) optimizes necessary
                 parameters; and (iii) exploits approximations for
                 kernel methods to construct representations in linear
                 time and space by expressing each time series as a
                 combination of the landmark time series. We extensively
                 evaluate GRAIL for querying, classification,
                 clustering, sampling, and visualization of time series.
                 For these tasks, methods leveraging GRAIL's
                 representations are significantly faster and at least
                 as accurate as state-of-the-art methods operating over
                 the raw time series. GRAIL shows promise as a new
                 primitive for highly accurate, yet scalable,
                 time-series analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Damasio:2019:GGA,
  author =       "Guilherme Damasio and Spencer Bryson and Vincent
                 Corvinelli and Parke Godfrey and Piotr Mierzejewski and
                 Jaroslaw Szlichta and Calisto Zuzarte",
  title =        "{GALO}: guided automated learning for
                 re-optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1778--1781",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352064",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query performance problem determination is usually
                 performed manually in consultation with experts through
                 the analysis of query plans. However, this is an
                 excessively time consuming, human error-prone, and
                 costly process. GALO is a novel system that automates
                 this process. The tool automatically learns recurring
                 problem patterns in query plans over workloads in an
                 offline learning phase to build a knowledge base of
                 plan rewrite remedies. GALO's knowledge base is built
                 on RDF and SPARQL, which is well-suited for
                 manipulating and querying over SQL query plans, which
                 are graphs themselves. It then uses the knowledge base
                 online to re-optimize queries queued for execution to
                 improve performance, often quite dramatically.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tian:2019:SGS,
  author =       "Yuanyuan Tian and Wen Sun and Sui Jun Tong and En
                 Liang Xu and Mir Hamid Pirahesh and Wei Zhao",
  title =        "Synergistic graph and {SQL} analytics inside {IBM
                 Db2}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1782--1785",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352065",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To meet the challenge of analyzing rapidly growing
                 graph and network data created by modern applications,
                 a large number of specialized graph databases have
                 emerged, such as Neo4j, JanusGraph, and Sqlg. At the
                 same time, RDBMSs and SQL continue to support
                 mission-critical business analytics. However, real-life
                 analytical applications seldom contain only one type of
                 analytics. They are often made of heterogeneous
                 workloads, including SQL, machine learning, graph, and
                 other analytics. In particular, SQL and graph analytics
                 are usually accompanied together in one analytical
                 workload. This means that graph and SQL analytics need
                 to be synergistic with each other. Unfortunately, most
                 existing graph databases are standalone and cannot
                 easily integrate with relational databases. In
                 addition, as a matter of fact, many graph data (data
                 about relationships between objects or people) are
                 already prevalent in relational databases, although
                 they are not explicitly stored as graphs. Performing
                 graph analytics on these relational graph data today
                 requires exporting large amount of data to the
                 specialized graph databases. A natural question arises:
                 can SQL and graph analytics be performed
                 synergistically in a same system? In this demo, we
                 present such a working system called IBM Db2 Graph. Db2
                 Graph is an in-DBMS graph query approach. It is
                 implemented as a layer inside an experimental IBM
                 Db2TM, and thus can support synergistic graph and SQL
                 analytics efficiently. Db2 Graph employs a graph
                 overlay approach to expose a graph view of the
                 relational data. This approach flexibly retrofits graph
                 queries to existing graph data stored in relational
                 tables. We use an example scenario on health insurance
                 claim analysis to demonstrate how Db2 Graph is used to
                 support synergistic graph and SQL analytics inside
                 Db2.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2019:CDC,
  author =       "Xiaoou Ding and Hongzhi Wang and Jiaxuan Su and Zijue
                 Li and Jianzhong Li and Hong Gao",
  title =        "{Cleanits}: a data cleaning system for industrial time
                 series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1786--1789",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352066",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The great amount of time series generated by machines
                 has enormous value in intelligent industry. Knowledge
                 can be discovered from high-quality time series, and
                 used for production optimization and anomaly detection
                 in industry. However, the original sensors data always
                 contain many errors. This requires a sophisticated
                 cleaning strategy and a well-designed system for
                 industrial data cleaning. Motivated by this, we
                 introduce Cleanits, a system for industrial time series
                 cleaning. It implements an integrated cleaning strategy
                 for detecting and repairing three kinds of errors in
                 industrial time series. We develop reliable data
                 cleaning algorithms, considering features of both
                 industrial time series and domain knowledge. We
                 demonstrate Cleanits with two real datasets from power
                 plants. The system detects and repairs multiple dirty
                 data precisely, and improves the quality of industrial
                 time series effectively. Cleanits has a friendly
                 interface for users, and result visualization along
                 with logs are available during each cleaning process.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2019:IIT,
  author =       "Yipeng Zhang and Zhifeng Bao and Songsong Mo and
                 Yuchen Li and Yanghao Zhou",
  title =        "{ITAA}: an intelligent trajectory-driven outdoor
                 advertising deployment assistant",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1790--1793",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352067",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we demonstrate an Intelligent
                 Trajectory-driven outdoor Advertising deployment
                 Assistant (ITAA), which assists users to find an
                 optimal strategy for outdoor advertising (ad)
                 deployment. The challenge is how to measure the
                 influence to the moving trajectories of ads, and how to
                 optimize the placement of ads among billboards that
                 maximize the influence has been proven NP-hard.
                 Therefore, we develop a framework based on two
                 trajectory-driven influence models. ITAA is built upon
                 this framework with a user-friendly UI. It serves both
                 ad companies and their customers. We enhance the
                 interpretability to improve the user's understanding of
                 the influence of ads. The interactive function of ITAA
                 is made interpretable and easy to engage.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Qian:2019:SHL,
  author =       "Kun Qian and Lucian Popa and Prithviraj Sen",
  title =        "{SystemER}: a human-in-the-loop system for explainable
                 entity resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1794--1797",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352068",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Entity Resolution (ER) is the task of identifying
                 different representations of the same real-world
                 object. To achieve scalability and the desired level of
                 quality, the typical ER pipeline includes multiple
                 steps that may involve low-level coding and extensive
                 human labor. We present SystemER, a tool for learning
                 explainable ER models that reduces the human labor all
                 throughout the stages of the ER pipeline. SystemER
                 achieves explainability by learning rules that not only
                 perform a given ER task but are human-comprehensible;
                 this provides transparency into the learning process,
                 and further enables verification and customization of
                 the learned model by the domain experts. By leveraging
                 a human in the loop and active learning, SystemER also
                 ensures that a small number of labeled examples is
                 sufficient to learn high-quality ER models. SystemER is
                 a full-fledged tool that includes an easy to use
                 interface, support for both flat files and
                 semi-structured data, and scale-out capabilities by
                 distributing computation via Apache Spark.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huynh:2019:BEF,
  author =       "Viet-Phi Huynh and Paolo Papotti",
  title =        "{Buckle}: evaluating fact checking algorithms built on
                 knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1798--1801",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352069",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Fact checking is the task of determining if a given
                 claim holds. Several algorithms have been developed to
                 check facts with reference information in the form of
                 knowledge bases. We demonstrate BUCKLE, an open-source
                 benchmark for comparing and evaluating fact checking
                 algorithms in a level playing field across a range of
                 scenarios. The demo is centered around three main
                 lessons. To start, we show how, by changing the
                 properties of the training and test facts, it is
                 possible to influence significantly the performance of
                 the algorithms. We then show the role of the reference
                 data. Finally, we discuss the performance for
                 algorithms designed on different principles and
                 assumptions, as well as approaches that address the
                 link prediction task in knowledge bases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Gao:2019:QSE,
  author =       "Peng Gao and Xusheng Xiao and Zhichun Li and Kangkook
                 Jee and Fengyuan Xu and Sanjeev R. Kulkarni and Prateek
                 Mittal",
  title =        "A query system for efficiently investigating complex
                 attack behaviors for enterprise security",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1802--1805",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352070",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The need for countering Advanced Persistent Threat
                 (APT) attacks has led to the solutions that
                 ubiquitously monitor system activities in each
                 enterprise host, and perform timely attack
                 investigation over the monitoring data for uncovering
                 the attack sequence. However, existing general-purpose
                 query systems lack explicit language constructs for
                 expressing key properties of major attack behaviors,
                 and their semantics-agnostic design often produces
                 inefficient execution plans for queries. To address
                 these limitations, we build Aiql, a novel query system
                 that is designed with novel types of domain-specific
                 optimizations to enable efficient attack investigation.
                 Aiql provides (1) a domain-specific data model and
                 storage for storing the massive system monitoring data,
                 (2) a domain-specific query language, Attack
                 Investigation Query Language (Aiql) that integrates
                 critical primitives for expressing major attack
                 behaviors, and (3) an optimized query engine based on
                 the characteristics of the data and the semantics of
                 the query to efficiently schedule the execution. We
                 have deployed Aiql in NEC Labs America comprising 150
                 hosts. In our demo, we aim to show the complete usage
                 scenario of Aiql by (1) performing an APT attack in a
                 controlled environment, and (2) using Aiql to
                 investigate such attack by querying the collected
                 system monitoring data that contains the attack traces.
                 The audience will have the option to perform the APT
                 attack themselves under our guidance, and interact with
                 the system and investigate the attack via issuing
                 queries and checking the query results through our web
                 UI.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Miao:2019:CEO,
  author =       "Zhengjie Miao and Qitian Zeng and Chenjie Li and Boris
                 Glavic and Oliver Kennedy and Sudeepa Roy",
  title =        "{CAPE}: explaining outliers by counterbalancing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1806--1809",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352071",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration we showcase Cape, a system that
                 explains surprising aggregation outcomes. In contrast
                 to previous work, which relies exclusively on
                 provenance, Cape explains outliers in aggregation
                 queries through related outliers in the opposite
                 direction that provide counterbalance. The foundation
                 of our approach are aggregate regression patterns
                 (ARPs) that describe coarse-grained trends in the data.
                 We define outliers as deviations from such patterns and
                 present an efficient algorithm to find counterbalances
                 explaining outliers. In the demonstration, the audience
                 can run aggregation queries over real world datasets,
                 identify outliers of interest in the result of such
                 queries, and browse the patterns and explanations
                 returned by Cape.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ramachandra:2019:BAI,
  author =       "Karthik Ramachandra and Kwanghyun Park",
  title =        "{BlackMagic}: automatic inlining of scalar {UDFs} into
                 {SQL} queries with {Froid}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1810--1813",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352072",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Relational DBMSs allow users to extend the standard
                 declarative SQL language surface using User Defined
                 Functions (UDFs) that implement custom behavior. While
                 UDFs offer many advantages, it is well-known amongst
                 practitioners that they can cause severe degradation in
                 query performance. This degradation is due to the fact
                 that state-of-the-art query optimizers treat UDFs as
                 black boxes and do not reason about them during
                 optimization. We demonstrate Froid, a framework for
                 optimizing UDFs by opening up this black box and
                 exposing its underlying operations to the query
                 optimizer. It achieves this by systematically
                 translating the entire body of an imperative
                 multi-statement UDF into a single relational algebraic
                 expression. Thereby, any query invoking this UDF is
                 transformed into a query with a nested sub-query that
                 is semantically equivalent to the UDF. We then leverage
                 existing sub-query optimization techniques and thereby
                 get efficient, set-oriented, parallel query plans as
                 opposed to inefficient, iterative, serial execution of
                 UDFs. We demonstrate the benefits of Froid including
                 performance gains of up to multiple orders of magnitude
                 on real workloads. Froid is available as a feature of
                 Microsoft SQL Server 2019 called 'Scalar UDF
                 Inlining'.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Berg:2019:PPD,
  author =       "Lukas Berg and Tobias Ziegler and Carsten Binnig and
                 Uwe R{\"o}hm",
  title =        "{ProgressiveDB}: progressive data analytics as a
                 middleware",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1814--1817",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352073",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "ProgressiveDB transforms any standard SQL database
                 into a progressive database capable of continuous,
                 approximate query processing. It introduces a few small
                 extensions to the SQL query language that allow clients
                 to express progressive analytical queries. These
                 extensions are processed in the ProgressiveDB
                 middleware that sits between a database application and
                 the underlying database providing interactive query
                 processing as well as query steering capabilities to
                 the user. In our demo, we show how this system allows a
                 database application with a graphical user interface to
                 interact with different backends, while providing the
                 user with immediate feedback during exploratory data
                 exploration of an on-time flight database.
                 ProgressiveDB also supports efficient query steering by
                 providing a new technique, called progressive views,
                 which allows the intermediate results of one
                 progressive query to be shared and reused by multiple
                 concurrent progressive queries with refined scope.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kara:2019:DHT,
  author =       "Kaan Kara and Zeke Wang and Ce Zhang and Gustavo
                 Alonso",
  title =        "{doppioDB 2.0}: hardware techniques for improved
                 integration of machine learning into databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1818--1821",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352074",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database engines are starting to incorporate machine
                 learning (ML) functionality as part of their
                 repertoire. Machine learning algorithms, however, have
                 very different characteristics than those of relational
                 operators. In this demonstration, we explore the
                 challenges that arise when integrating generalized
                 linear models into a database engine and how to
                 incorporate hardware accelerators into the execution, a
                 tool now widely used for ML workloads. The demo
                 explores two complementary alternatives: (1) how to
                 train models directly on compressed/encrypted
                 column-stores using a specialized coordinate descent
                 engine, and (2) how to use a bitwise weaving index for
                 stochastic gradient descent on low precision input
                 data. We present these techniques as implemented in our
                 prototype database doppioDB 2.0 and show how the new
                 functionality can be used from SQL.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pahins:2019:CSV,
  author =       "Cicero A. L. Pahins and Behrooz Omidvar-Tehrani and
                 Sihem Amer-Yahia and Val{\'e}rie Siroux and Jean-Louis
                 Pepin and Jean-Christian Borel and Jo{\~a}o L. D.
                 Comba",
  title =        "{COVIZ}: a system for visual formation and exploration
                 of patient cohorts",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1822--1825",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352075",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate COVIZ, an interactive system to
                 visually form and explore patient cohorts. COVIZ
                 seamlessly integrates visual cohort formation and
                 exploration, making it a single destination for
                 hypothesis generation. COVIZ is easy to use by medical
                 experts and offers many features: (1) It provides the
                 ability to isolate patient demographics (e.g., their
                 age group and location), health markers (e.g., their
                 body mass index), and treatments (e.g., Ventilation for
                 respiratory problems), and hence facilitates cohort
                 formation; (2) It summarizes the evolution of
                 treatments of a cohort into health trajectories, and
                 lets medical experts explore those trajectories; (3) It
                 guides them in examining different facets of a cohort
                 and generating hypotheses for future analysis; (4)
                 Finally, it provides the ability to compare the
                 statistics and health trajectories of multiple cohorts
                 at once. COVIZ relies on QDS, a novel data structure
                 that encodes and indexes various data distributions to
                 enable their efficient retrieval. Additionally, COVIZ
                 visualizes air quality data in the regions where
                 patients live to help with data interpretations. We
                 demonstrate two key scenarios, ecological scenario and
                 case cross-over scenario. A video demonstration of
                 COVIZ is accessible via http://bit.ly/video-coviz.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Franke:2019:PTF,
  author =       "Martin Franke and Ziad Sehili and Erhard Rahm",
  title =        "{PRIMAT}: a toolbox for fast privacy-preserving
                 matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1826--1829",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352076",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Privacy-preserving record linkage (PPRL) is
                 increasingly demanded in real-world applications, e.g.,
                 in the health-care domain, to combine person-related
                 data for data analysis while preserving the privacy of
                 individuals. However, the adoption of PPRL is hampered
                 by the absence of easy-to-use and powerful PPRL tools
                 covering the entire PPRL process. We therefore
                 demonstrate Primat, a flexible and scalable tool that
                 enables the definition and application of tailored PPRL
                 workflows as well as the comparative evaluation of
                 different PPRL methods. We introduce the main
                 requirements for PPRL tools and discuss previous tool
                 efforts that do not fully meet the requirements and
                 have not been applied in practice. By contrast, Primat
                 covers the whole PPRL life-cycle and improves
                 applicability by providing various components for data
                 owners and the central linkage to be executed by a
                 trusted linkage unit.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Marcus:2019:NFR,
  author =       "Ryan Marcus and Chi Zhang and Shuai Yu and Geoffrey
                 Kao and Olga Papaemmanouil",
  title =        "{NashDB}: fragmentation, replication, and provisioning
                 using economic methods",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1830--1833",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352077",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern elastic computing systems allow applications to
                 scale up and down automatically, increasing capacity
                 for workload spikes and ensuring cost savings during
                 lulls in activity. Adapting database management systems
                 to work on top of such elastic infrastructure is not a
                 trivial task, and requires a deep understanding of the
                 sophisticated interplay between data fragmentation,
                 replica allocation, and cluster provisioning. This
                 demonstration showcases NashDB, an end-to-end method
                 for addressing these concerns in an automatic way.
                 NashDB relies on economic models to maximize query
                 performance while staying within a user's budget. This
                 demonstration will (1) allow audience members to see
                 how NashDB handles shifting workloads in an adaptive
                 way, and (2) allow audience members to test NashDB
                 themselves by constructing synthetic workloads and
                 seeing how NashDB adapts a cluster to them in real
                 time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sabek:2019:FAS,
  author =       "Ibrahim Sabek and Mashaal Musleh and Mohamed F.
                 Mokbel",
  title =        "Flash in action: scalable spatial data analysis using
                 {Markov} logic networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1834--1837",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352078",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The current explosion in spatial data raises the need
                 for efficient spatial analysis tools to extract useful
                 information from such data. However, existing tools are
                 neither generic nor scalable when dealing with big
                 spatial data. This demo presents Flash; a framework for
                 generic and scalable spatial data analysis, with a
                 special focus on spatial probabilistic graphical
                 modelling (SPGM). Flash exploits Markov Logic Networks
                 (MLN) to express SPGM as a set of declarative logical
                 rules. In addition, it provides spatial variations of
                 the scalable RDBMS-based learning and inference
                 techniques of MLN to efficiently perform SPGM
                 predictions. To show Flash effectiveness, we
                 demonstrate three applications that use Flash in their
                 SPGM: (1) Bird monitoring, (2) Safety analysis, and (3)
                 Land use change tracking.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kuhring:2019:CBO,
  author =       "Lucas Kuhring and Zsolt Istv{\'a}n",
  title =        "{I} can't believe it's not (only) software!: bionic
                 distributed storage for {Parquet} files",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1838--1841",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352079",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "There is a steady increase in the size of data stored
                 and processed as part of data science applications,
                 leading to bottlenecks and inefficiencies at various
                 layers of the stack. One way of reducing such
                 bottlenecks and increasing energy efficiency is by
                 tailoring the underlying distributed storage solution
                 to the application domain, using resources more
                 efficiently. We explore this idea in the context of a
                 popular column-oriented storage format used in big data
                 workloads, namely Apache Parquet. Our prototype uses an
                 FPGA-based storage node that offers high bandwidth data
                 deduplication and a companion software library that
                 exposes an API for Parquet file access. This way the
                 storage node remains general purpose and could be
                 shared by applications from different domains, while,
                 at the same time, benefiting from deduplication well
                 suited to Apache Parquet files and from selective reads
                 of columns in the file. In this demonstration we show,
                 on the one hand, that by relying on the FPGA's dataflow
                 processing model, it is possible to implement in-line
                 deduplication without increasing latencies
                 significantly or reducing throughput. On the other
                 hand, we highlight the benefits of implementing the
                 application-specific aspects in a software library
                 instead of FPGA circuits and how this enables, for
                 instance, regular data science frameworks running in
                 Python to access the data on the storage node and to
                 offload filtering operations.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Choi:2019:VVI,
  author =       "Hyewon Choi and Erkang Zhu and Arsala Bangash and
                 Ren{\'e}e J. Miller",
  title =        "{VISE}: vehicle image search engine with traffic
                 camera",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1842--1845",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352080",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present VISE, or Vehicle Image Search Engine, to
                 support the fast search of similar vehicles from
                 low-resolution traffic camera images. VISE can be used
                 to trace and locate vehicles for applications such as
                 police investigations when high-resolution footage is
                 not available. Our system consists of three components:
                 an interactive user-interface for querying and browsing
                 identified vehicles; a scalable search engine for fast
                 similarity search on millions of visual objects; and an
                 image processing pipeline that extracts feature vectors
                 of objects from video frames. We use transfer learning
                 technique to integrate state-of-the-art Convolutional
                 Neural Networks with two different refinement methods
                 to achieve high retrieval accuracy. We also use an
                 efficient high-dimensional nearest neighbor search
                 index to enable fast retrieval speed. In the demo, our
                 system will offer users an interactive experience
                 exploring a large database of traffic camera images
                 that is growing in real time at 200K frames per day.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Goldberg:2019:WSF,
  author =       "Stephan Goldberg and Tova Milo and Slava Novgorodov
                 and Kathy Razmadze",
  title =        "{WiClean}: a system for fixing {Wikipedia} interlinks
                 using revision history patterns",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1846--1849",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352081",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present WiClean, a Wikipedia plug-in that supports
                 the identification and cleaning of certain types of
                 errors in Wikipedia interlinks. The system mines update
                 patterns in Wikipedia revision logs, identifies the
                 common time frames in which they occur, and employs
                 them to signal incomplete/inconsistent updates and
                 suggests corrections. We demonstrate the effectiveness
                 of WiClean in identifying actual errors in a variety of
                 Wikipedia entity types, interactively employing the
                 VLDB'19 audience as editors to correct the identified
                 errors.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Roy:2019:SHC,
  author =       "Abhishek Roy and Alekh Jindal and Hiren Patel and
                 Ashit Gosalia and Subru Krishnan and Carlo Curino",
  title =        "{SparkCruise}: handsfree computation reuse in
                 {Spark}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1850--1853",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352082",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Interactive data analytics is often inundated with
                 common computations across multiple queries. These
                 redundancies result in poor query performance and
                 higher overall cost for the interactive query sessions.
                 Obviously, reusing these common computations could lead
                 to cost savings. However, it is difficult for the users
                 to manually detect and reuse the common computations in
                 their fast moving interactive sessions. In the paper,
                 we propose to demonstrate SparkCruise, a computation
                 reuse system that automatically selects the most useful
                 common computations to materialize based on the past
                 query workload. SparkCruise materializes these
                 computations as part of query processing, so the users
                 can continue with their query processing just as before
                 and computation reuse is automatically applied in the
                 background --- all without any modifications to the
                 Spark code. We will invite the audience to play with
                 several scenarios, such as workload redundancy insights
                 and pay-as-you-go materialization, highlighting the
                 utility of SparkCruise.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sandha:2019:DDM,
  author =       "Sandeep Singh Sandha and Wellington Cabrera and
                 Mohammed Al-Kateb and Sanjay Nair and Mani Srivastava",
  title =        "In-database distributed machine learning:
                 demonstration using {Teradata SQL} engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1854--1857",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352083",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Machine learning has enabled many interesting
                 applications and is extensively being used in big data
                 systems. The popular approach --- training machine
                 learning models in frameworks like Tensorflow, Pytorch
                 and Keras --- requires movement of data from database
                 engines to analytical engines, which adds an excessive
                 overhead on data scientists and becomes a performance
                 bottleneck for model training. In this demonstration,
                 we give a practical exhibition of a solution for the
                 enablement of distributed machine learning natively
                 inside database engines. During the demo, the audience
                 will interactively use Python APIs in Jupyter Notebooks
                 to train multiple linear regression models on synthetic
                 regression datasets and neural network models on vision
                 and sensory datasets directly inside Teradata SQL
                 Engine.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2019:SLS,
  author =       "Zhao Li and Xia Chen and Xuming Pan and Pengcheng Zou
                 and Yuchen Li and Guoxian Yu",
  title =        "{SHOAL}: large-scale hierarchical taxonomy via
                 graph-based query coalition in e-commerce",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1858--1861",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352084",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "E-commerce taxonomy plays an essential role in online
                 retail business. Existing taxonomy of e-commerce
                 platforms organizes items into an ontology structure.
                 However, the ontology-driven approach is subject to
                 costly manual maintenance and often does not capture
                 user's search intention, particularly when user
                 searches by her personalized needs rather than a
                 universal definition of the items. Observing that
                 search queries can effectively express user's
                 intention, we present a novel large-Scale Hierarchical
                 taxOnomy via grAph based query coaLition ( SHOAL ) to
                 bridge the gap between item taxonomy and user search
                 intention. SHOAL organizes hundreds of millions of
                 items into a hierarchical topic structure. Each topic
                 that consists of a cluster of items denotes a
                 conceptual shopping scenario, and is tagged with
                 easy-to-interpret descriptions extracted from search
                 queries. Furthermore, SHOAL establishes correlation
                 between categories of ontology-driven taxonomy, and
                 offers opportunities for explainable recommendation.
                 The feedback from domain experts shows that SHOAL
                 achieves a precision of 98\% in terms of placing items
                 into the right topics, and the result of an online A/B
                 test demonstrates that SHOAL boosts the Click Through
                 Rate (CTR) by 5\%. SHOAL has been deployed in Alibaba
                 and supports millions of searches for online shopping
                 per day.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Xu:2019:DMD,
  author =       "Min Xu and Tianhao Wang and Bolin Ding and Jingren
                 Zhou and Cheng Hong and Zhicong Huang",
  title =        "{DPSAaS}: multi-dimensional data sharing and analytics
                 as services under local differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1862--1865",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352085",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Differential privacy has emerged as the de facto
                 standard for privacy definitions, and been used by,
                 e.g., Apple, Google, Uber, and Microsoft, to collect
                 sensitive information about users and to build
                 privacy-preserving analytics engines. However, most of
                 such advanced privacy-protection techniques are not
                 accessible to mid-size companies and app developers in
                 the cloud. We demonstrate a lightweight middleware
                 DPSAaS, which provides differentially private
                 data-sharing-and-analytics functionality as cloud
                 services. We focus on multi-dimensional analytical
                 (MDA) queries under local differential privacy (LDP) in
                 this demo. MDA queries against a fact table have
                 predicates on (categorical or ordinal) dimensions and
                 aggregate one or more measures. In the absence of a
                 trusted agent, sensitive dimensions and measures are
                 encoded in a privacy-preserving way locally using our
                 LDP data sharing service, before being sent to the data
                 collector. The data collector estimates the answers to
                 MDA queries from the encoded data, using our data
                 analytics service. We will highlight the design
                 decisions of DPSAaS and twists made to LDA algorithms
                 to fit the design, in order to smoothly connect DPSAaS
                 to the data processing platform and analytics engines,
                 and to facilitate efficient large-scale processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2019:PPS,
  author =       "Yang Cao and Yonghui Xiao and Li Xiong and Liquan Bai
                 and Masatoshi Yoshikawa",
  title =        "{PriSTE}: protecting spatiotemporal event privacy in
                 continuous location-based services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1866--1869",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352086",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Location privacy-preserving mechanisms (LPPMs) have
                 been extensively studied for protecting a user's
                 location in location-based services. However, when
                 user's perturbed locations are released continuously,
                 existing LPPMs may not protect users' sensitive
                 spatiotemporal event, such as ``visited hospital in the
                 last week'' or ``regularly commuting between location 1
                 and location 2 every morning and afternoon'' (it is
                 easy to infer that locations 1 and 2 may be home and
                 office). In this demonstration, we demonstrate PriSTE
                 for protecting spatiotemporal event privacy in
                 continuous location release. First, to raise users'
                 awareness of such a new privacy goal, we design an
                 interactive tool to demonstrate how accurate an
                 adversary could infer a secret spatiotemporal event
                 from a sequence of locations or even LPPM-protected
                 locations. The attendees can find that some
                 spatiotemporal events are quite risky and even these
                 state-of-the-art LPPMs do not always protect
                 spatiotemporal event privacy. Second, we demonstrate
                 how a user can use PriSTE to automatically or manually
                 convert an LPPM for location privacy into one
                 protecting spatiotemporal event privacy in continuous
                 location-based services. Finally, we visualize the
                 trade-off between privacy and utility so that users can
                 choose appropriate privacy parameters in different
                 application scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Deutch:2019:DOS,
  author =       "Daniel Deutch and Evgeny Marants and Yuval
                 Moskovitch",
  title =        "{Datalignment}: ontology schema alignment through
                 datalog containment",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1870--1873",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352087",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We focus on the problem of aligning ontology
                 relations, namely finding relation names that
                 correspond to the same or related concepts. Such
                 alignment is a prerequisite to the integration of the
                 multiple available Knowledge Bases many of which
                 include similar concepts, differently termed. We
                 propose a novel approach for this problem, by
                 leveraging association rules --- originally mined in
                 order to enrich the ontological content. Here, we treat
                 the rules as Datalog programs and look for
                 bounded-depth sub-programs that are contained in (or
                 equivalent to) each other. Heads of such programs
                 intuitively correspond to related concepts, and we
                 propose them as candidates for alignment. The candidate
                 alignments require further verification by experts; to
                 this end we accompany each aligned pair with
                 explanations based on the provenance of each relation
                 according to its sub-program. We have implemented our
                 novel solution in a system called Datalignment. We
                 propose to demonstrate Datalignment, presenting the
                 aligned pairs that it finds, and the computed
                 explanations, in context of real-life Knowledge
                 Bases.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ge:2019:IIH,
  author =       "Congcong Ge and Yunjun Gao and Xiaoye Miao and Lu Chen
                 and Christian S. Jensen and Ziyuan Zhu",
  title =        "{IHCS}: an integrated hybrid cleaning system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1874--1877",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352088",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data cleaning is a prerequisite to subsequent data
                 analysis, and is know to often be time-consuming and
                 labor-intensive. We present IHCS, a hybrid data
                 cleaning system that integrates error detection and
                 repair to contend effectively with multiple error
                 types. In a preprocessing step that precedes the data
                 cleaning, IHCS formats an input dataset to be cleaned,
                 and transforms applicable data quality rules into a
                 unified format. Then, an MLN index structure is formed
                 according to the unified rules, enabling IHCS to handle
                 multiple error types simultaneously. During the
                 cleaning, IHCS first tackles abnormalities through an
                 abnormal group process, and then, it generates multiple
                 data versions based on the MLN index. Finally, IHCS
                 eliminates conflicting values across the multiple
                 versions, and derives the final unified clean data. A
                 visual interface enables cleaning process monitoring
                 and cleaning result analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Costa:2019:CGB,
  author =       "Constantinos Costa and Xiaoyu Ge and Panos K.
                 Chrysanthis",
  title =        "{CAPRIO}: graph-based integration of indoor and
                 outdoor data for path discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1878--1881",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352089",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recently, navigation and localization systems have
                 emerged to support queries like the shortest distance
                 in either indoor or outdoor with additional
                 constraints. These systems, however, neither combine
                 the indoor and outdoor information nor consider the
                 external natural conditions like the weather that one
                 may face across an outdoor path. In this demonstration
                 paper we present CAPRIO, which proposes and implements
                 a novel graph representation that integrates indoor and
                 outdoor information to discover paths that personalize
                 outdoor exposure while minimizes the overall path
                 length. We also demonstrate how unifying the graph
                 algorithms for indoor and outdoor navigation enables
                 significant optimizations that would not be possible
                 otherwise.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wu:2019:HAS,
  author =       "Yingjun Wu and Jia Yu and Yuanyuan Tian and Richard
                 Sidle and Ronald Barber",
  title =        "{HERMIT} in action: succinct secondary indexing
                 mechanism via correlation exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1882--1885",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352090",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database administrators construct secondary indexes on
                 data tables to accelerate query processing in
                 relational database management systems (RDBMSs). These
                 indexes are built on top of the most frequently queried
                 columns according to the data statistics.
                 Unfortunately, maintaining multiple secondary indexes
                 in the same database can be extremely space consuming,
                 causing significant performance degradation due to the
                 potential exhaustion of memory space. However, we find
                 that there indeed exist many opportunities to save
                 storage space by exploiting column correlations. We
                 recently introduced Hermit, a succinct secondary
                 indexing mechanism for modern RDBMSs. Hermit
                 judiciously leverages the rich soft functional
                 dependencies hidden among columns to prune out
                 redundant structures for indexed key access. instead of
                 building a complete index that stores every single
                 entry in the key columns, Hermit navigates any incoming
                 key access queries to an existing index built on the
                 correlated columns. This is achieved through the Tiered
                 Regression Search Tree (TRS-Tree), a succinct,
                 ML-enhanced data structure that performs fast curve
                 fitting to adaptively and dynamically capture both
                 column correlations and outliers. In this
                 demonstration, we showcase Hermit's appealing
                 characteristics. we not only demonstrate that Hermit
                 can significantly reduce space consumption with limited
                 performance overhead in terms of query response time
                 and index maintenance time, but also explain in detail
                 the rationale behind Hermit's high efficiency using
                 interactive online query processing examples.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Loudet:2019:DSH,
  author =       "Julien Loudet and Iulian Sandu-Popa and Luc Bouganim",
  title =        "{DISPERS}: securing highly distributed queries on
                 personal data management systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1886--1889",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352091",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Personal Data Management Systems (PDMS) advance at a
                 rapid pace allowing us to integrate all our personal
                 data in a single place and use it for our benefit and
                 for the benefit of the community. This leads to a
                 significant paradigm shift since personal data become
                 massively distributed and opens an important question:
                 how to query this massively distributed data in an
                 efficient, pertinent and privacy preserving way? This
                 demonstration proposes a fully-distributed PDMS called
                 DISPERS, built on top of SEP2P, allowing users to
                 securely and efficiently share and query their personal
                 data. The demonstration platform graphically
                 illustrates the query execution in details, showing
                 that DISPERS leads to maximal system security with low
                 and scalable overhead. Attendees are welcome to
                 challenge the security provided by DISPERS using the
                 proposed hacking tools.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Akhter:2019:SFS,
  author =       "Adil Akhter and Marios Fragkoulis and Asterios
                 Katsifodimos",
  title =        "Stateful functions as a service in action",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1890--1893",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352092",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In the serverless model, users upload application code
                 to a cloud platform and the cloud provider undertakes
                 the deployment, execution and scaling of the
                 application, relieving users from all operational
                 aspects. Although very popular, current serverless
                 offerings offer poor support for the management of
                 local application state, the main reason being that
                 managing state and keeping it consistent at large scale
                 is very challenging. As a result, the serverless model
                 is inadequate for executing stateful, latency-sensitive
                 applications. In this paper we present a high-level
                 programming model for developing stateful functions and
                 deploying them in the cloud. Our programming model
                 allows functions to retain state as well as call other
                 functions. In order to deploy stateful functions in a
                 cloud infrastructure, we translate functions and their
                 data exchanges into a stateful dataflow graph. With
                 this paper we aim at demonstrating that using a
                 modified version of an open-source dataflow engine as a
                 runtime for stateful functions, we can deploy scalable
                 and stateful services in the cloud with surprisingly
                 low latency and high throughput.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ordookhanians:2019:DKO,
  author =       "Allen Ordookhanians and Xin Li and Supun Nakandala and
                 Arun Kumar",
  title =        "Demonstration of {Krypton}: optimized {CNN} inference
                 for occlusion-based deep {CNN} explanations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1894--1897",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352093",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration, we present Krypton, a system
                 for accelerating occlusion-based deep convolution
                 neural network (CNN) explanation workloads. Driven by
                 the success of CNNs in image understanding tasks, there
                 is growing adoption of CNNs in various domains,
                 including high stakes applications such as radiology.
                 However, users of such applications often seek an
                 ``explanation'' for why a CNN predicted a certain
                 label. One of the most widely used approaches for
                 explaining CNN predictions is the occlusion-based
                 explanation (OBE) method. This approach is
                 computationally expensive due to the large number of
                 re-inference requests produced. Krypton reduces the
                 runtime of OBE by up to 35x by enabling incremental and
                 approximate inference optimizations that are inspired
                 by classical database query optimization techniques. We
                 allow the audience to interactively diagnose CNN
                 predictions from several use cases, including radiology
                 and natural images. A short video of our demonstration
                 can be found here: https://youtu.be/1OWddbd4n6Y",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Miao:2019:LVE,
  author =       "Zhengjie Miao and Andrew Lee and Sudeepa Roy",
  title =        "{LensXPlain}: visualizing and explaining contributing
                 subsets for aggregate query answers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1898--1901",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352094",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demonstration, we will present LensXPlain, an
                 interactive system to help users understand answers of
                 aggregate queries by providing meaningful explanations.
                 Given a SQL group-by query and a question from a user
                 `` why output o is high/low '', or `` why output o$_1$
                 is higher/lower than o$_2$ '', LensXPlain helps users
                 explore the results and find subsets of tuples captured
                 by predicates that contributed the most toward such
                 observations. The contributions are measured either by
                 intervention (if the contributing tuples are removed,
                 the values or the ratios in the user question change in
                 the opposite direction), or by aggravation (if the
                 query is restricted to the contributing tuples, the
                 observations change more in the same direction).
                 LensXPlain uses ensemble learning for recommending
                 useful attributes in explanations, and employs a suite
                 of optimizations to enable explanation generation and
                 refinement at an interactive speed. In the
                 demonstration, the audience can run aggregation queries
                 over real world datasets, browse the answers using a
                 graphical user interface, ask questions on
                 unexpected/interesting query results with simple
                 visualizations, and explore and refine explanations
                 returned by LensXPlain.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2019:JDL,
  author =       "Yi Zhang and Zachary G. Ives",
  title =        "Juneau: data lake management for {Jupyter}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1902--1905",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352095",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In collaborative settings such as multi-investigator
                 laboratories, data scientists need improved tools to
                 manage not their data records but rather their data
                 sets and data products, to facilitate both provenance
                 tracking and data (and code) reuse within their data
                 lakes and file systems. We demonstrate the Juneau
                 System, which extends computational notebook software
                 (Jupyter Notebook) as an instrumentation and data
                 management point for overseeing and facilitating
                 improved dataset usage, through capabilities for
                 indexing, searching, and recommending ``complementary''
                 data sources, previously extracted machine learning
                 features, and additional training data. This
                 demonstration focuses on how we help the user find
                 related datasets via search.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hasani:2019:AEA,
  author =       "Sona Hasani and Faezeh Ghaderi and Shohedul Hasan and
                 Saravanan Thirumuruganathan and Abolfazl Asudeh and
                 Nick Koudas and Gautam Das",
  title =        "{ApproxML}: efficient approximate ad-hoc {ML} models
                 through materialization and reuse",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1906--1909",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352096",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Machine learning (ML) has gained a pivotal role in
                 answering complex predictive analytic queries. Model
                 building for large scale datasets is one of the time
                 consuming parts of the data science pipeline. Often
                 data scientists are willing to sacrifice some accuracy
                 in order to speed up this process during the
                 exploratory phase. In this paper, we propose to
                 demonstrate ApproxML, a system that efficiently
                 constructs approximate ML models for new queries from
                 previously constructed ML models using the concepts of
                 model materialization and reuse. ApproxML supports a
                 variety of ML models such as generalized linear models
                 for supervised learning, and K-means and Gaussian
                 Mixture model for unsupervised learning.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Essertel:2019:FAL,
  author =       "Gr{\'e}gory Essertel and Ruby Y. Tahboub and Fei Wang
                 and James Decker and Tiark Rompf",
  title =        "{Flare \& Lantern}: efficiently swapping horses
                 midstream",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1910--1913",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352097",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Running machine learning (ML) workloads at scale is as
                 much a data management problem as a model engineering
                 problem. Big performance challenges exist when data
                 management systems invoke ML classifiers as
                 user-defined functions (UDFs) or when stand-alone ML
                 frameworks interact with data stores for data loading
                 and pre-processing (ETL). In particular, UDFs can be
                 precompiled or simply a black box for the data
                 management system and the data layout may be completely
                 different from the native layout, thus adding overheads
                 at the boundaries. In this demo, we will show how
                 bottlenecks between existing systems can be eliminated
                 when their engines are designed around runtime
                 compilation and native code generation, which is the
                 case for many state-of-the-art relational engines as
                 well as ML frameworks. We demonstrate an integration of
                 Flare (an accelerator for Spark SQL), and Lantern (an
                 accelerator for TensorFlow and PyTorch) that results in
                 a highly optimized end-to-end compiled data path,
                 switching between SQL and ML processing with negligible
                 overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Martins:2019:TES,
  author =       "Ruben Martins and Jia Chen and Yanju Chen and Yu Feng
                 and Isil Dillig",
  title =        "{Trinity}: an extensible synthesis framework for data
                 science",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1914--1917",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352098",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this demo paper, we introduce Trinity, a
                 general-purpose framework that can be used to quickly
                 build domain-specific program synthesizers for
                 automating many tedious tasks that arise in data
                 science. We illustrate how Trinity can be used by three
                 different users: First, we show how end-users can use
                 Trinity's built-in synthesizers to automate data
                 wrangling tasks. Second, we show how advanced users can
                 easily extend existing synthesizers to support
                 additional functionalities. Third, we show how
                 synthesis experts can change the underlying search
                 engine in Trinity. Overall, this paper is intended to
                 demonstrate how users can quickly use, modify, and
                 extend the Trinity framework with the goal of
                 automating many tasks that are considered to be the
                 ``janitor'' work of data science.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2019:PAA,
  author =       "Zhiqi Huang and Ryan McKenna and George Bissias and
                 Gerome Miklau and Michael Hay and Ashwin
                 Machanavajjhala",
  title =        "{PSynDB}: accurate and accessible private data
                 generation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1918--1921",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352099",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Across many application domains, trusted parties who
                 collect sensitive information need mechanisms to safely
                 disseminate data. A favored approach is to generate
                 synthetic data: a dataset similar to the original,
                 hopefully retaining its statistical features, but one
                 that does not reveal the private information of
                 contributors to the data. We present PSynDB, a
                 web-based synthetic table generator that is built on
                 recent privacy technologies [10,11,15]. PSynDB
                 satisfies the formal guarantee of differential privacy
                 and generates synthetic tables with high accuracy for
                 tasks that the user specifies as important. PSynDB
                 allows users to browse expected error rates before
                 running the mechanism, a useful feature for making
                 important policy decisions, such as setting the privacy
                 loss budget. When the user has finished configuration,
                 the tool outputs a data synthesis program that can be
                 ported to a trusted environment. There it can be safely
                 executed on the private data to produce the private
                 synthetic dataset for broad dissemination.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chandramouli:2019:FFI,
  author =       "Badrish Chandramouli and Dong Xie and Yinan Li and
                 Donald Kossmann",
  title =        "{FishStore}: fast ingestion and indexing of raw data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1922--1925",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352100",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The last decade has witnessed a huge increase in data
                 being ingested into the cloud from a variety of data
                 sources. The ingested data takes various forms such as
                 JSON, CSV, and binary formats. Traditionally, data is
                 either ingested into storage in raw form, indexed
                 ad-hoc using range indices, or cooked into
                 analytics-friendly columnar formats. None of these
                 solutions is able to handle modern requirements on
                 storage: making the data available immediately for
                 ad-hoc and streaming queries while ingesting at
                 extremely high throughputs. We demonstrate FishStore,
                 our open-source concurrent latch-free storage layer for
                 data with flexible schema. FishStore builds on recent
                 advances in parsing and indexing techniques, and is
                 based on multi-chain hash indexing of dynamically
                 registered predicated subsets of data. We find
                 predicated subset hashing to be a powerful primitive
                 that supports a broad range of queries on ingested data
                 and admits a higher performance (by up to an order of
                 magnitude) implementation than current alternatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Diao:2019:SMF,
  author =       "Yanlei Diao and Pawe{\l} Guzewicz and Ioana Manolescu
                 and Mirjana Mazuran",
  title =        "{Spade}: a modular framework for analytical
                 exploration of {RDF} graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1926--1929",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352101",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "RDF data is complex; exploring it is hard, and can be
                 done through many different metaphors. We have
                 developed and propose to demonstrate Spade, a tool
                 helping users discover meaningful content of an RDF
                 graph by showing them the results of aggregation
                 (OLAP-style) queries automatically identified from the
                 data. Spade chooses aggregates that are visually
                 interesting, a property formally based on statistic
                 properties of the aggregation query results. While well
                 understood for relational data, such exploration raises
                 multiple challenges for RDF: facts, dimensions and
                 measures have to be identified (as opposed to known
                 beforehand); as there are more candidate aggregates,
                 assessing their interestingness can be very costly;
                 finally, ontologies bring novel specific challenges but
                 also novel opportunities, enabling ontology-driven
                 exploration from an aggregate initially proposed by the
                 system. Spade is a generic, extensible framework, which
                 we instantiated with: ( i ) novel methods for
                 enumerating candidate measures and dimensions in the
                 vast space of possibilities provided by an RDF graph; (
                 ii ) a set of aggregate interestingness functions; (
                 iii ) ontology-based interactive exploration, and ( iv
                 ) efficient early-stop techniques for estimating the
                 interestingness of an aggregate query. The
                 demonstration will comprise interactive scenarios on a
                 variety of large, interesting RDF graphs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dsilva:2019:MRD,
  author =       "Joseph Vinish D'silva and Florestan {De Moor} and
                 Bettina Kemme",
  title =        "Making an {RDBMS} data scientist friendly: advanced
                 in-database interactive analytics with visualization
                 support",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1930--1933",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352102",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We are currently witnessing the rapid evolution and
                 adoption of various data science frameworks that
                 function external to the database. Any support from
                 conventional RDBMS implementations for data science
                 applications has been limited to procedural paradigms
                 such as user-defined functions (UDFs) that lack
                 exploratory programming support. Therefore, the current
                 status quo is that during the exploratory phase, data
                 scientists usually use the database system as the
                 ``data storage'' layer of the data science framework,
                 whereby the majority of computation and analysis is
                 performed outside the database, e.g., at the client
                 node. We demonstrate AIDA, an in-database framework for
                 data scientists. AIDA allows users to write interactive
                 Python code using a development environment such as a
                 Jupyter notebook. The actual execution itself takes
                 place inside the database (near-data), where a server
                 component of AIDA, that resides inside the embedded
                 Python interpreter of the RDBMS, manages the data sets
                 and computations. The demonstration will also show the
                 visualization capabilities of AIDA where the progress
                 of computation can be observed through live updates.
                 Our evaluations show that AIDA performs several times
                 faster compared to contemporary external data science
                 frameworks, but is much easier to use for exploratory
                 development compared to database UDFs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zaouk:2019:UNG,
  author =       "Khaled Zaouk and Fei Song and Chenghao Lyu and Arnab
                 Sinha and Yanlei Diao and Prashant Shenoy",
  title =        "{UDAO}: a next-generation unified data analytics
                 optimizer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1934--1937",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352103",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Big data analytics systems today still lack the
                 ability to take user performance goals and budgetary
                 constraints, collectively referred to as
                 ``objectives'', and automatically configure an analytic
                 job to achieve the objectives. This paper presents
                 UDAO, a unified data analytics optimizer that can
                 automatically determine the parameters of the runtime
                 system, collectively called a job configuration, for
                 general dataflow programs based on user objectives.
                 UDAO embodies key techniques including in-situ
                 modeling, which learns a model for each user objective
                 in the same computing environment as the job is run,
                 and multi-objective optimization, which computes a
                 Pareto optimal set of job configurations to reveal
                 tradeoffs between different objectives. Using
                 benchmarks developed based on industry needs, our
                 demonstration will allow the user to explore (1)
                 learned models to gain insights into how various
                 parameters affect user objectives; (2) Pareto frontiers
                 to understand interesting tradeoffs between different
                 objectives and how a configuration recommended by the
                 optimizer explores these tradeoffs; (3) end-to-end
                 benefits that UDAO can provide over default
                 configurations or those manually tuned by engineers.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jo:2019:AFC,
  author =       "Saehan Jo and Immanuel Trummer and Weicheng Yu and
                 Xuezhi Wang and Cong Yu and Daniel Liu and Niyati
                 Mehta",
  title =        "{AggChecker}: a fact-checking system for text
                 summaries of relational data sets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1938--1941",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352104",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate AggChecker, a novel tool for verifying
                 textual summaries of relational data sets. The system
                 automatically verifies natural language claims about
                 numerical aggregates against the underlying raw data.
                 The system incorporates a combination of natural
                 language processing, information retrieval, machine
                 learning, and efficient query processing strategies.
                 Each claim is translated into a semantically equivalent
                 SQL query and evaluated against the database. Our
                 primary goal is analogous to that of a spell-checker:
                 to identify erroneous claims and provide guidance in
                 correcting them. In this demonstration, we show that
                 our system enables users to verify text summaries much
                 more efficiently than a standard SQL interface.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:GIG,
  author =       "Hanzhang Wang and Phuong Nguyen and Jun Li and Selcuk
                 Kopru and Gene Zhang and Sanjeev Katariya and Sami
                 Ben-Romdhane",
  title =        "{GRANO}: interactive graph-based root cause analysis
                 for cloud-native distributed data platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1942--1945",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352105",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We demonstrate Grano$^1$, an end-to-end anomaly
                 detection and root cause analysis (or RCA for short)
                 system for cloud-native distributed data platform by
                 providing a holistic view of the system component
                 topology, alarms and application events. Grano
                 provides: a Detection Layer to process large amount of
                 time-series monitoring data to detect anomalies at
                 logical and physical system components; an Anomaly
                 Graph Layer with novel graph modeling and algorithms
                 for leveraging system topology data and detection
                 results to identify the root cause relevance at the
                 system component level; and an Application Layer that
                 automatically notifies on-call personnel and presents
                 real-time and on-demand RCA support through an
                 interactive graph interface. The system is deployed and
                 evaluated using eBay's production data to help on-call
                 personnel to shorten the identification of root cause
                 from hours to minutes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Frey:2019:DHB,
  author =       "Davide Frey and Marc X. Makkes and Pierre-Louis Roman
                 and Fran{\c{c}}ois Ta{\"\i}ani and Spyros Voulgaris",
  title =        "{Dietcoin}: hardening {Bitcoin} transaction
                 verification process for mobile devices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1946--1949",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352106",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed ledgers are among the most replicated data
                 repositories in the world. They offer data consistency,
                 immutability, and auditability, based on the assumption
                 that each participating node locally verifies their
                 entire content. Although their content, currently
                 extending up to a few hundred gigabytes, can be
                 accommodated by dedicated commodity hard disks,
                 downloading it, processing it, and storing it in
                 general-purpose desktop and laptop computers can prove
                 largely impractical. Even worse, this becomes a
                 prohibitive restriction for smartphones, mobile
                 devices, and resource-constrained IoT devices. In this
                 demo, we present an implementation of Dietcoin, a
                 Bitcoin protocol extension that allows nodes to perform
                 secure local verification of Bitcoin transactions with
                 small bandwidth and storage requirements. This demo
                 presents and benchmarks the main features of Dietcoin
                 that are important for today's cryptocurrencies and
                 smart contract systems, but are missing in the current
                 state-of-the-art: (i) allowing resource-constrained
                 devices to verify the correctness of selected blocks
                 locally without having to download the complete ledger;
                 (ii) enabling devices to join a blockchain quickly yet
                 securely, dropping bootstrap time from days down to a
                 matter of seconds; (iii) providing a generic solution
                 that can be applied to other distributed ledgers
                 secured with Proof-of-Work.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Singla:2019:RLS,
  author =       "Samriddhi Singla and Ahmed Eldawy and Rami Alghamdi
                 and Mohamed F. Mokbel",
  title =        "{Raptor}: large scale analysis of big raster and
                 vector data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1950--1953",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352107",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the increase in amount of remote sensing data,
                 there have been efforts to efficiently process it to
                 help ecologists and geographers answer queries.
                 However, they often need to process this data in
                 combination with vector data, for example, city
                 boundaries. Existing efforts require one dataset to be
                 converted to the other representation, which is
                 extremely inefficient for large datasets. In this
                 demonstration, we focus on the zonal statistics
                 problem, which computes the statistics over a raster
                 layer for each polygon in a vector layer. We
                 demonstrate three approaches, vector-based,
                 raster-based, and raptor-based approaches. The latter
                 is a recent effort of combining raster and vector data
                 without a need of any conversion. This demo will allow
                 users to run their own queries in any of the three
                 methods and observe the differences in their
                 performance depending on different raster and vector
                 dataset sizes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rezig:2019:DCH,
  author =       "El Kindi Rezig and Lei Cao and Michael Stonebraker and
                 Giovanni Simonini and Wenbo Tao and Samuel Madden and
                 Mourad Ouzzani and Nan Tang and Ahmed K. Elmagarmid",
  title =        "{Data Civilizer 2.0}: a holistic framework for data
                 preparation and analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1954--1957",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352108",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data scientists spend over 80\% of their time (1)
                 parameter-tuning machine learning models and (2)
                 iterating between data cleaning and machine learning
                 model execution. While there are existing efforts to
                 support the first requirement, there is currently no
                 integrated workflow system that couples data cleaning
                 and machine learning development. The previous version
                 of Data Civilizer was geared towards data cleaning and
                 discovery using a set of pre-defined tools. In this
                 paper, we introduce Data Civilizer 2.0, an end-to-end
                 workflow system satisfying both requirements. In
                 addition, this system also supports a sophisticated
                 data debugger and a workflow visualization system. In
                 this demo, we will show how we used Data Civilizer 2.0
                 to help scientists at the Massachusetts General
                 Hospital build their cleaning and machine learning
                 pipeline on their 30TB brain activity dataset.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Spiegelberg:2019:TRE,
  author =       "Leonhard F. Spiegelberg and Tim Kraska",
  title =        "{Tuplex}: robust, efficient analytics when {Python}
                 rules",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1958--1961",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352109",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Spark became the defacto industry standard as an
                 execution engine for data preparation, cleaning,
                 distributed machine learning, streaming and,
                 warehousing over raw data. However, with the success of
                 Python the landscape is shifting again; there is a
                 strong demand for tools which better integrate with the
                 Python landscape and do not have the impedance mismatch
                 like Spark. In this paper, we demonstrate Tuplex (short
                 for tuples and exceptions ), a Python-native data
                 preparation framework that allows users to develop and
                 deploy pipelines faster and more robustly while
                 providing bare-metal execution times through code
                 compilation whenever possible.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Renggli:2019:EMC,
  author =       "Cedric Renggli and Frances Ann Hubis and Bojan Karlas
                 and Kevin Schawinski and Wentao Wu and Ce Zhang",
  title =        "{Ease.ml\slash ci} and {Ease.ml\slash meter} in
                 action: towards data management for statistical
                 generalization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1962--1965",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352110",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Developing machine learning (ML) applications is
                 similar to developing traditional software --- it is
                 often an iterative process in which developers navigate
                 within a rich space of requirements, design decisions,
                 implementations, empirical quality, and performance. In
                 traditional software development, software engineering
                 is the field of study which provides principled
                 guidelines for this iterative process. However, as of
                 today, the counterpart of ``software engineering for
                 ML'' is largely missing --- developers of ML
                 applications are left with powerful tools (e.g.,
                 TensorFlow and PyTorch) but little guidance regarding
                 the development lifecycle itself. In this paper, we
                 view the management of ML development life-cycles from
                 a data management perspective. We demonstrate two
                 closely related systems, ease.ml/ci and ease.ml/meter,
                 that provide some ``principled guidelines'' for ML
                 application development: ci is a continuous integration
                 engine for ML models and meter is a ``profiler'' for
                 controlling overfitting of ML models. Both systems
                 focus on managing the ``statistical generalization
                 power'' of datasets used for assessing the quality of
                 ML applications, namely, the validation set and the
                 test set. By demonstrating these two systems we hope to
                 spawn further discussions within our community on
                 building this new type of data management systems for
                 statistical generalization.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Han:2019:PRV,
  author =       "Xueran Han and Jun Chen and Jiaheng Lu and Yueguo Chen
                 and Xiaoyong Du",
  title =        "{PivotE}: revealing and visualizing the underlying
                 entity structures for exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1966--1969",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352111",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "A Web-scale knowledge graph (KG) typically contains
                 millions of entities and thousands of entity types. Due
                 to the lack of a pre-defined data schema such as the ER
                 model, entities in KGs are loosely coupled based on
                 their relationships, which brings challenges for
                 effective accesses of the KGs in a structured manner
                 like SPARQL. This demonstration presents an
                 entity-oriented exploratory search prototype system
                 that is able to support search and explore KGs in a
                 exploratory search manner, where local structures of
                 KGs can be dynamically discovered and utilized for
                 guiding users. The system applies a path-based ranking
                 method for recommending similar entities and their
                 relevant information as exploration pointers. The
                 interface is designed to assist users to investigate a
                 domain (particular type) of entities, as well as to
                 explore the knowledge graphs in various relevant
                 domains. The queries are dynamically formulated by
                 tracing the users' dynamic clicking (exploration)
                 behaviors. In this demonstration, we will show how our
                 system visualize the underlying entity structures, as
                 well as explain the semantic correlations among them in
                 a unified interface, which not only assist users to
                 learn about the properties of entities in many aspects
                 but also guide them to further explore the information
                 space.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2019:SYA,
  author =       "Jiaheng Lu and Yuxing Chen and Herodotos Herodotou and
                 Shivnath Babu",
  title =        "Speedup your analytics: automatic parameter tuning for
                 databases and big data systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1970--1973",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352112",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database and big data analytics systems such as Hadoop
                 and Spark have a large number of configuration
                 parameters that control memory distribution, I/O
                 optimization, parallelism, and compression. Improper
                 parameter settings can cause significant performance
                 degradation and stability issues. However, regular
                 users and even expert administrators struggle to
                 understand and tune them to achieve good performance.
                 In this tutorial, we review existing approaches on
                 automatic parameter tuning for databases, Hadoop, and
                 Spark, which we classify into six categories:
                 rule-based, cost modeling, simulation-based,
                 experiment-driven, machine learning, and adaptive
                 tuning. We describe the foundations of different
                 automatic parameter tuning algorithms and present pros
                 and cons of each approach. We also highlight real-world
                 applications and systems, and identify research
                 challenges for handling cloud services, resource
                 heterogeneity, and real-time analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Meng:2019:TAC,
  author =       "Yu Meng and Jiaxin Huang and Jingbo Shang and Jiawei
                 Han",
  title =        "{TextCube}: automated construction and
                 multidimensional exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1974--1977",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352113",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Today's society is immersed in a wealth of text data,
                 ranging from news articles, to social media, research
                 literature, medical records, and corporate reports. A
                 grand challenge of data science and engineering is to
                 develop effective and scalable methods to extract
                 structures and knowledge from massive text data to
                 satisfy diverse applications, without extensive,
                 corpus-specific human annotations. In this tutorial, we
                 show that TextCube provides a critical information
                 organization structure that will satisfy such an
                 information need. We overview a set of recently
                 developed data-driven methods that facilitate automated
                 construction of TextCubes from massive, domain-specific
                 text corpora, and show that TextCubes so constructed
                 will enhance text exploration and analysis for various
                 applications. We focus on new TextCube construction
                 methods that are scalable, weakly-supervised,
                 domain-independent, language-agnostic, and effective
                 (i.e., generating quality TextCubes from large corpora
                 of various domains). We will demonstrate with real
                 datasets (including news articles, scientific
                 publications, and product reviews) on how TextCubes can
                 be constructed to assist multidimensional analysis of
                 massive text corpora.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Amer-Yahia:2019:EEO,
  author =       "Sihem Amer-Yahia and Senjuti Basu Roy",
  title =        "The ever evolving online labor market: overview,
                 challenges and opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1978--1981",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352114",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The goal of this tutorial is to make the audience
                 aware of various discipline-specific research
                 activities that could be characterized to be part of
                 online labor markets and advocate for a unified
                 framework that is interdisciplinary in nature and
                 requires convergence of different research disciplines.
                 We will discuss how such a framework could bring
                 transformative effect on the nexus of humans,
                 technology, and the future of work.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sabek:2019:MLM,
  author =       "Ibrahim Sabek and Mohamed F. Mokbel",
  title =        "Machine learning meets big spatial data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1982--1985",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352115",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The proliferation in amounts of generated data has
                 propelled the rise of scalable machine learning
                 solutions to efficiently analyze and extract useful
                 insights from such data. Meanwhile, spatial data has
                 become ubiquitous, e.g., GPS data, with increasingly
                 sheer sizes in recent years. The applications of big
                 spatial data span a wide spectrum of interests
                 including tracking infectious disease, climate change
                 simulation, drug addiction, among others. Consequently,
                 major research efforts are exerted to support efficient
                 analysis and intelligence inside these applications by
                 either providing spatial extensions to existing machine
                 learning solutions or building new solutions from
                 scratch. In this 90-minutes tutorial, we
                 comprehensively review the state-of-the-art work in the
                 intersection of machine learning and big spatial data.
                 We cover existing research efforts and challenges in
                 three major areas of machine learning, namely, data
                 analysis, deep learning and statistical inference, as
                 well as two advanced spatial machine learning tasks,
                 namely, spatial features extraction and spatial
                 sampling. We also highlight open problems and
                 challenges for future research in this area.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Nargesian:2019:DLM,
  author =       "Fatemeh Nargesian and Erkang Zhu and Ren{\'e}e J.
                 Miller and Ken Q. Pu and Patricia C. Arocena",
  title =        "Data lake management: challenges and opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1986--1989",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352116",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The ubiquity of data lakes has created fascinating new
                 challenges for data management research. In this
                 tutorial, we review the state-of-the-art in data
                 management for data lakes. We consider how data lakes
                 are introducing new problems including dataset
                 discovery and how they are changing the requirements
                 for classic problems including data extraction, data
                 cleaning, data integration, data versioning, and
                 metadata management.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lakshmanan:2019:CFN,
  author =       "Laks V. S. Lakshmanan and Michael Simpson and
                 Saravanan Thirumuruganathan",
  title =        "Combating fake news: a data management and mining
                 perspective",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1990--1993",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352117",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Fake news is a major threat to global democracy
                 resulting in diminished trust in government, journalism
                 and civil society. The public popularity of social
                 media and social networks has caused a contagion of
                 fake news where conspiracy theories, disinformation and
                 extreme views flourish. Detection and mitigation of
                 fake news is one of the fundamental problems of our
                 times and has attracted widespread attention. While
                 fact checking websites such as snopes, politifact and
                 major companies such as Google, Facebook, and Twitter
                 have taken preliminary steps towards addressing fake
                 news, much more remains to be done. As an
                 interdisciplinary topic, various facets of fake news
                 have been studied by communities as diverse as machine
                 learning, databases, journalism, political science and
                 many more. The objective of this tutorial is two-fold.
                 First, we wish to familiarize the database community
                 with the efforts by other communities on combating fake
                 news. We provide a panoramic view of the
                 state-of-the-art of research on various aspects
                 including detection, propagation, mitigation, and
                 intervention of fake news. Next, we provide a concise
                 and intuitive summary of prior research by the database
                 community and discuss how it could be used to
                 counteract fake news. The tutorial covers research from
                 areas such as data integration, truth discovery and
                 fusion, probabilistic databases, knowledge graphs and
                 crowdsourcing from the lens of fake news. Effective
                 tools for addressing fake news could only be built by
                 leveraging the synergistic relationship between
                 database and other research communities. We hope that
                 our tutorial provides an impetus towards such synthesis
                 of ideas and the creation of new ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Anciaux:2019:PDS,
  author =       "Nicolas Anciaux and Luc Bouganim and Philippe Pucheral
                 and Iulian Sandu Popa and Guillaume Scerri",
  title =        "Personal database security and trusted execution
                 environments: a tutorial at the crossroads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1994--1997",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352118",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Smart disclosure initiatives and new regulations such
                 as GDPR in the EU increase the interest for Personal
                 Data Management Systems (PDMS) being provided to
                 individuals to preserve their entire digital life.
                 Consequently, the thorny issue of data security becomes
                 more and more prominent, but highly differs from
                 traditional privacy issues in outsourced corporate
                 databases. Concurrently, the emergence of Trusted
                 Execution Environments (TEE) changes the game in
                 privacy-preserving data management with novel security
                 models. This tutorial offers a global perspective of
                 the current state of work at the confluence of these
                 two rapidly growing areas. The goal is threefold: (1)
                 review and categorize PDMS solutions and identify
                 existing privacy threats and countermeasures; (2)
                 review new security models capitalizing on TEEs and
                 related privacy-preserving data management solutions
                 relevant to the personal context; (3) discuss new
                 challenges at the intersection of PDMS security and
                 TEE-based data management.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kessler:2019:SHG,
  author =       "Stephan Kessler and Jens Hoff and Johann-Christoph
                 Freytag",
  title =        "{SAP HANA} goes private: from privacy research to
                 privacy aware enterprise analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "1998--2009",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352119",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Over the last 20 years, the progress of information
                 technology has allowed many companies to generate,
                 integrate, store, and analyze data of unprecedented
                 size and complexity. In many cases, this data is
                 personal data and how it can be used is therefore
                 subject to laws that depend on the specific countries
                 and application domains. For example, the General Data
                 Protection Regulation (GDPR) introduced in the European
                 Union imposes strict rules on how personal data can be
                 processed. Analyzing personal data can create
                 tremendous value, but at the same time companies must
                 ensure that they remain legally compliant.
                 Unfortunately, existing systems offer only limited or
                 no support at all for processing personal data in a
                 privacy-aware manner. Approaches that have emerged from
                 the academic and industrial research environments need
                 to be integrated into large systems (like enterprise
                 systems) in a manageable and scalable way. In many IT
                 environments, it is also desirable and necessary to
                 combine and to integrate personal data with other
                 (non-personal) data in a seamless fashion. In this
                 paper, we present the first steps that SAP has taken to
                 provide its database management system SAP HANA with
                 privacy-enhanced processing capabilities, referred to
                 in the following as SAP HANA Data Anonymization.
                 Various goals on both the conceptual and technical
                 levels were followed with the aim of providing SAP
                 customers today with an integrated processing
                 environment for personal and non-personal data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Damasio:2019:GAL,
  author =       "Guilherme Damasio and Vincent Corvinelli and Parke
                 Godfrey and Piotr Mierzejewski and Alex Mihaylov and
                 Jaroslaw Szlichta and Calisto Zuzarte",
  title =        "Guided automated learning for query workload
                 re-optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2010--2021",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352120",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Query optimization is a hallmark of database systems.
                 When an SQL query runs more expensively than is viable
                 or warranted, determination of the performance issues
                 is usually performed manually in consultation with
                 experts through the analysis of query's execution plan
                 (QEP). However, this is an excessively time consuming,
                 human error-prone, and costly process. GALO is a novel
                 system that automates this process. The tool
                 automatically learns recurring problem patterns in
                 query plans over workloads in an offline learning
                 phase, to build a knowledge base of plan-rewrite
                 remedies. It then uses the knowledge base online to
                 re-optimize queries often quite drastically. GALO's
                 knowledge base is built on RDF and SPARQL, W3C graph
                 database standards, which is well suited for
                 manipulating and querying over SQL query plans, which
                 are graphs themselves. GALO acts as a third-tier of
                 re-optimization, after query rewrite and cost-based
                 optimization, as a query plan rewrite. For generality,
                 the context of knowledge base problem patterns,
                 including table and column names, is abstracted with
                 canonical symbol labels. Since the knowledge base is
                 not tied to the context of supplied QEPs, table and
                 column names are matched automatically during the
                 re-optimization phase. Thus, problem patterns learned
                 over a particular query workload can be applied in
                 other query workloads. GALO's knowledge base is also an
                 invaluable tool for database experts to debug query
                 performance issues by tracking to known issues and
                 solutions as well as refining the optimizer with new
                 tuned techniques by the development team. We
                 demonstrate an experimental study of the effectiveness
                 of our techniques over synthetic TPC-DS and real IBM
                 client query workloads.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chattopadhyay:2019:PUS,
  author =       "Biswapesh Chattopadhyay and Priyam Dutta and Weiran
                 Liu and Ott Tinn and Andrew Mccormick and Aniket
                 Mokashi and Paul Harvey and Hector Gonzalez and David
                 Lomax and Sagar Mittal and Roee Ebenstein and Nikita
                 Mikhaylin and Hung-ching Lee and Xiaoyan Zhao and Tony
                 Xu and Luis Perez and Farhad Shahmohammadi and Tran Bui
                 and Neil McKay and Selcuk Aya and Vera Lychagina and
                 Brett Elliott",
  title =        "{Procella}: unifying serving and analytical data at
                 {YouTube}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2022--2034",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352121",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Large organizations like YouTube are dealing with
                 exploding data volume and increasing demand for data
                 driven applications. Broadly, these can be categorized
                 as: reporting and dashboarding, embedded statistics in
                 pages, time-series monitoring, and ad-hoc analysis.
                 Typically, organizations build specialized
                 infrastructure for each of these use cases. This,
                 however, creates silos of data and processing, and
                 results in a complex, expensive, and harder to maintain
                 infrastructure. At YouTube, we solved this problem by
                 building a new SQL query engine --- Procella. Procella
                 implements a superset of capabilities required to
                 address all of the four use cases above, with high
                 scale and performance, in a single product. Today,
                 Procella serves hundreds of billions of queries per day
                 across all four workloads at YouTube and several other
                 Google product areas.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lu:2019:LET,
  author =       "Wei Lu and Zhanhao Zhao and Xiaoyu Wang and Haixiang
                 Li and Zhenmiao Zhang and Zhiyu Shui and Sheng Ye and
                 Anqun Pan and Xiaoyong Du",
  title =        "A lightweight and efficient temporal database
                 management system in {TDSQL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2035--2046",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352122",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Driven by the recent adoption of temporal expressions
                 into SQL:2011, extensions of temporal support in
                 conventional database management systems (a.b.a. DBMSs)
                 have re-emerged as a research hotspot. In this paper,
                 we present a lightweight yet efficient built-in
                 temporal implementation in Tencent's distributed
                 database management system, namely TDSQL. The novelty
                 of TDSQL's temporal implementation includes: (1) a new
                 temporal data model with the extension of SQL:2011, (2)
                 a built-in temporal implementation with various
                 optimizations, which are also applicable to other
                 DBMSs, and (3) a low-storage-consumption in which only
                 data changes are maintained. For the repeatability
                 purpose, we elaborate the integration of our proposed
                 techniques into MySQL. We conduct extensive experiments
                 on both real-life dataset and synthetic TPC benchmarks
                 by comparing TD-SQL with other temporal databases. The
                 results show that TDSQL is lightweight and efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sherkat:2019:NSE,
  author =       "Reza Sherkat and Colin Florendo and Mihnea Andrei and
                 Rolando Blanco and Adrian Dragusanu and Amit Pathak and
                 Pushkar Khadilkar and Neeraj Kulkarni and Christian
                 Lemke and Sebastian Seifert and Sarika Iyer and
                 Sasikanth Gottapu and Robert Schulze and Chaitanya
                 Gottipati and Nirvik Basak and Yanhong Wang and Vivek
                 Kandiyanallur and Santosh Pendap and Dheren Gala and
                 Rajesh Almeida and Prasanta Ghosh",
  title =        "Native store extension for {SAP HANA}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2047--2058",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352123",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present an overview of SAP HANA's Native Store
                 Extension (NSE). This extension substantially increases
                 database capacity, allowing to scale far beyond
                 available system memory. NSE is based on a hybrid
                 in-memory and paged column store architecture composed
                 from data access primitives. These primitives enable
                 the processing of hybrid columns using the same
                 algorithms optimized for traditional HANA's in-memory
                 columns. Using only three key primitives, we fabricated
                 byte-compatible counterparts for complex memory
                 resident data structures (e.g. dictionary and
                 hash-index), compressed schemes (e.g. sparse and
                 run-length encoding), and exotic data types (e.g.
                 geo-spatial). We developed a new buffer cache which
                 optimizes the management of paged resources by smart
                 strategies sensitive to page type and access patterns.
                 The buffer cache integrates with HANA's new execution
                 engine that issues pipelined prefetch requests to
                 improve disk access patterns. A novel load unit
                 configuration, along with a unified persistence format,
                 allows the hybrid column store to dynamically switch
                 between in-memory and paged data access to balance
                 performance and storage economy according to
                 application demands while reducing Total Cost of
                 Ownership (TCO). A new partitioning scheme supports
                 load unit specification at table, partition, and column
                 level. Finally, a new advisor recommends optimal load
                 unit configurations. Our experiments illustrate the
                 performance and memory footprint improvements on
                 typical customer scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhan:2019:ART,
  author =       "Chaoqun Zhan and Maomeng Su and Chuangxian Wei and
                 Xiaoqiang Peng and Liang Lin and Sheng Wang and Zhe
                 Chen and Feifei Li and Yue Pan and Fang Zheng and
                 Chengliang Chai",
  title =        "{AnalyticDB}: real-time {OLAP} database system at
                 {Alibaba} cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2059--2070",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352124",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With data explosion in scale and variety, OLAP
                 databases play an increasingly important role in
                 serving real-time analysis with low latency (e.g.,
                 hundreds of milliseconds), especially when incoming
                 queries are complex and ad hoc in nature. Moreover,
                 these systems are expected to provide high query
                 concurrency and write throughput, and support queries
                 over structured and complex data types (e.g., JSON,
                 vector and texts). In this paper, we introduce
                 AnalyticDB, a real-time OLAP database system developed
                 at Alibaba. AnalyticDB maintains all-column indexes in
                 an asynchronous manner with acceptable overhead, which
                 provides low latency for complex ad-hoc queries. Its
                 storage engine extends hybrid row-column layout for
                 fast retrieval of both structured data and data of
                 complex types. To handle large-scale data with high
                 query concurrency and write throughput, AnalyticDB
                 decouples read and write access paths. To further
                 reduce query latency, novel storage-aware SQL optimizer
                 and execution engine are developed to fully utilize the
                 advantages of the underlying storage and indexes.
                 AnalyticDB has been successfully deployed on Alibaba
                 Cloud to serve numerous customers (both large and
                 small). It is capable of holding 100 trillion rows of
                 records, i.e., 10PB+ in size. At the same time, it is
                 able to serve 10m+ writes and 100k+ queries per second,
                 while completing complex queries within hundreds of
                 milliseconds.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Schultz:2019:TCM,
  author =       "William Schultz and Tess Avitabile and Alyson Cabral",
  title =        "Tunable consistency in {MongoDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2071--2081",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352125",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed databases offer high availability but can
                 impose high costs on client applications in order to
                 maintain strong consistency at all times. MongoDB is a
                 document oriented database whose replication system
                 provides a variety of consistency levels allowing
                 client applications to select the trade-offs they want
                 to make when it comes to consistency and latency, at a
                 per operation level. In this paper we discuss the
                 tunable consistency models in MongoDB replication and
                 their utility for application developers. We discuss
                 how the MongoDB replication system's speculative
                 execution model and data rollback protocol help make
                 this spectrum of consistency levels possible. We also
                 present case studies of how these consistency levels
                 are used in real world applications, along with a
                 characterization of their performance benefits and
                 trade-offs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2019:TOR,
  author =       "Shaosheng Cao and XinXing Yang and Cen Chen and Jun
                 Zhou and Xiaolong Li and Yuan Qi",
  title =        "{TitAnt}: online real-time transaction fraud detection
                 in {Ant Financial}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2082--2093",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352126",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the explosive growth of e-commerce and the
                 booming of e-payment, detecting online transaction
                 fraud in real time has become increasingly important to
                 Fintech business. To tackle this problem, we introduce
                 the TitAnt, a transaction fraud detection system
                 deployed in Ant Financial, one of the largest Fintech
                 companies in the world. The system is able to predict
                 online real-time transaction fraud in mere
                 milliseconds. We present the problem definition,
                 feature extraction, detection methods, implementation
                 and deployment of the system, as well as empirical
                 effectiveness. Extensive experiments have been
                 conducted on large real-world transaction data to show
                 the effectiveness and the efficiency of the proposed
                 system.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2019:ACG,
  author =       "Rong Zhu and Kun Zhao and Hongxia Yang and Wei Lin and
                 Chang Zhou and Baole Ai and Yong Li and Jingren Zhou",
  title =        "{AliGraph}: a comprehensive graph neural network
                 platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2094--2105",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352127",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "An increasing number of machine learning tasks require
                 dealing with large graph datasets, which capture rich
                 and complex relationship among potentially billions of
                 elements. Graph Neural Network (GNN) becomes an
                 effective way to address the graph learning problem by
                 converting the graph data into a low dimensional space
                 while keeping both the structural and property
                 information to the maximum extent and constructing a
                 neural network for training and referencing. However,
                 it is challenging to provide an efficient graph storage
                 and computation capabilities to facilitate GNN training
                 and enable development of new GNN algorithms. In this
                 paper, we present a comprehensive graph neural network
                 system, namely AliGraph, which consists of distributed
                 graph storage, optimized sampling operators and runtime
                 to efficiently support not only existing popular GNNs
                 but also a series of in-house developed ones for
                 different scenarios. The system is currently deployed
                 at Alibaba to support a variety of business scenarios,
                 including product recommendation and personalized
                 search at Alibaba's E-Commerce platform. By conducting
                 extensive experiments on a real-world dataset with
                 492.90 million vertices, 6.82 billion edges and rich
                 attributes, AliGraph performs an order of magnitude
                 faster in terms of graph building (5 minutes vs hours
                 reported from the state-of-the-art PowerGraph
                 platform). At training, AliGraph runs 40\%-50\% faster
                 with the novel caching strategy and demonstrates around
                 12 times speed up with the improved runtime. In
                 addition, our in-house developed GNN models all
                 showcase their statistically significant superiorities
                 in terms of both effectiveness and efficiency (e.g.,
                 4.12\%--17.19\% lift by F1 scores).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chen:2019:CSF,
  author =       "Zhimin Chen and Yue Wang and Vivek Narasayya and
                 Surajit Chaudhuri",
  title =        "Customizable and scalable fuzzy join for big data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2106--2117",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352128",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Fuzzy join is an important primitive for data
                 cleaning. The ability to customize fuzzy join is
                 crucial to allow applications to address
                 domain-specific data quality issues such as synonyms
                 and abbreviations. While efficient indexing techniques
                 exist for single-node implementations of customizable
                 fuzzy join, the state-of-the-art scale-out techniques
                 do not support customization, and exhibit poor
                 performance and scalability characteristics. We
                 describe the design of a scale-out fuzzy join operator
                 that supports customization. We use a
                 locality-sensitive-hashing (LSH) based signature
                 scheme, and introduce optimizations that result in
                 significant speed up with negligible impact on recall.
                 We evaluate our implementation on the Azure Databricks
                 version of Spark using several real-world and synthetic
                 data sets. We observe speedups exceeding 50X compared
                 to the best-known prior scale-out technique, and close
                 to linear scalability with data size and number of
                 nodes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2019:QQA,
  author =       "Guoliang Li and Xuanhe Zhou and Shifu Li and Bo Gao",
  title =        "{QTune}: a query-aware database tuning system with
                 deep reinforcement learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2118--2130",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352129",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Database knob tuning is important to achieve high
                 performance (e.g., high throughput and low latency).
                 However, knob tuning is an NP-hard problem and existing
                 methods have several limitations. First, DBAs cannot
                 tune a lot of database instances on different
                 environments (e.g., different database vendors).
                 Second, traditional machine-learning methods either
                 cannot find good configurations or rely on a lot of
                 high-quality training examples which are rather hard to
                 obtain. Third, they only support coarse-grained tuning
                 (e.g., workload-level tuning) but cannot provide
                 fine-grained tuning (e.g., query-level tuning). To
                 address these problems, we propose a query-aware
                 database tuning system QTune with a deep reinforcement
                 learning (DRL) model, which can efficiently and
                 effectively tune the database configurations. QTune
                 first featurizes the SQL queries by considering rich
                 features of the SQL queries. Then QTune feeds the query
                 features into the DRL model to choose suitable
                 configurations. We propose a Double-State Deep
                 Deterministic Policy Gradient (DS-DDPG) model to enable
                 query-aware database configuration tuning, which
                 utilizes the actor-critic networks to tune the database
                 configurations based on both the query vector and
                 database states. QTune provides three database tuning
                 granularities: query-level, workload-level, and
                 cluster-level tuning. We deployed our techniques onto
                 three real database systems, and experimental results
                 show that QTune achieves high performance and
                 outperforms the state-of-the-art tuning methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kandula:2019:EAQ,
  author =       "Srikanth Kandula and Kukjin Lee and Surajit Chaudhuri
                 and Marc Friedman",
  title =        "Experiences with approximating queries in
                 {Microsoft}'s production big-data clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2131--2142",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352130",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With the rapidly growing volume of data, it is more
                 attractive than ever to leverage approximations to
                 answer analytic queries. Sampling is a powerful
                 technique which has been studied extensively from the
                 point of view of facilitating approximation. Yet, there
                 has been no large-scale study of effectiveness of
                 sampling techniques in big data systems. In this paper,
                 we describe an in-depth study of the sampling-based
                 approximation techniques that we have deployed in
                 Microsoft's big data clusters. We explain the choices
                 we made to implement approximation, identify the usage
                 cases, and study detailed data that sheds insight on
                 the usefulness of doing sampling based approximation.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Antonopoulos:2019:CTR,
  author =       "Panagiotis Antonopoulos and Peter Byrne and Wayne Chen
                 and Cristian Diaconu and Raghavendra Thallam
                 Kodandaramaih and Hanuma Kodavalla and Prashanth
                 Purnananda and Adrian-Leonard Radu and Chaitanya
                 Sreenivas Ravella and Girish Mittur Venkataramanappa",
  title =        "Constant time recovery in {Azure SQL} database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2143--2154",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352131",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Azure SQL Database and the upcoming release of SQL
                 Server introduce a novel database recovery mechanism
                 that combines traditional ARIES recovery with
                 multi-version concurrency control to achieve database
                 recovery in constant time, regardless of the size of
                 user transactions. Additionally, our algorithm enables
                 continuous transaction log truncation, even in the
                 presence of long running transactions, thereby allowing
                 large data modifications using only a small, constant
                 amount of log space. These capabilities are
                 particularly important for any Cloud database service
                 given (a) the constantly increasing database sizes, (b)
                 the frequent failures of commodity hardware, (c) the
                 strict availability requirements of modern, global
                 applications and (d) the fact that software upgrades
                 and other maintenance tasks are managed by the Cloud
                 platform, introducing unexpected failures for the
                 users. This paper describes the design of our recovery
                 algorithm and demonstrates how it allowed us to improve
                 the availability of Azure SQL Database by guaranteeing
                 consistent recovery times of under 3 minutes for
                 99.999\% of recovery cases in production.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2019:YGD,
  author =       "Yuzhen Huang and Yingjie Shi and Zheng Zhong and Yihui
                 Feng and James Cheng and Jiwei Li and Haochuan Fan and
                 Chao Li and Tao Guan and Jingren Zhou",
  title =        "{Yugong}: geo-distributed data and job placement at
                 scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2155--2169",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352132",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Companies like Alibaba operate tens of data centers
                 (DCs) across geographically distributed locations.
                 These DCs collectively provide the storage space and
                 computing power for the company, storing EBs of data
                 and serving millions of batch analytics jobs every day.
                 In Alibaba, as our businesses grow, there are more and
                 more cross-DC dependencies caused by jobs reading data
                 from remote DCs. Consequently, the precious wide area
                 network bandwidth becomes a major bottleneck for
                 operating geo-distributed DCs at scale. In this paper,
                 we present Yugong --- a system that manages data
                 placement and job placement in Alibaba's
                 geo-distributed DCs, with the objective to minimize
                 cross-DC bandwidth usage. Yugong uses three methods,
                 namely project placement, table replication, and job
                 outsourcing, to address the issues of high bandwidth
                 consumption across the DCs. We give the details of
                 Yugong's design and implementation for the three
                 methods, and describe how it cooperates with other
                 systems (e.g., Alibaba's big data analytics platform
                 and cluster scheduler) to improve the productivity of
                 the DCs. We also report comprehensive performance
                 evaluation results, which validate the design of Yugong
                 and show that significant reduction in cross-DC
                 bandwidth usage has been achieved.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tan:2019:CCD,
  author =       "Junjay Tan and Thanaa Ghanem and Matthew Perron and
                 Xiangyao Yu and Michael Stonebraker and David DeWitt
                 and Marco Serafini and Ashraf Aboulnaga and Tim
                 Kraska",
  title =        "Choosing a cloud {DBMS}: architectures and tradeoffs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2170--2182",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352133",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As analytic (OLAP) applications move to the cloud,
                 DBMSs have shifted from employing a pure shared-nothing
                 design with locally attached storage to a hybrid design
                 that combines the use of shared-storage (e.g., AWS S3)
                 with the use of shared-nothing query execution
                 mechanisms. This paper sheds light on the resulting
                 tradeoffs, which have not been properly identified in
                 previous work. To this end, it evaluates the TPC-H
                 benchmark across a variety of DBMS offerings running in
                 a cloud environment (AWS) on fast 10Gb+ networks,
                 specifically database-as-a-service offerings (Redshift,
                 Athena), query engines (Presto, Hive), and a
                 traditional cloud agnostic OLAP database (Vertica).
                 While these comparisons cannot be apples-to-apples in
                 all cases due to cloud configuration restrictions, we
                 nonetheless identify patterns and design choices that
                 are advantageous. These include prioritizing low-cost
                 object stores like S3 for data storage, using system
                 agnostic yet still performant columnar formats like ORC
                 that allow easy switching to other systems for
                 different workloads, and making features that benefit
                 subsequent runs like query precompilation and caching
                 remote data to faster storage optional rather than
                 required because they disadvantage ad hoc queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2019:SSM,
  author =       "Jingtian Zhang and Sai Wu and Zeyuan Tan and Gang Chen
                 and Zhushi Cheng and Wei Cao and Yusong Gao and Xiaojie
                 Feng",
  title =        "{S3}: a scalable in-memory skip-list index for
                 key--value store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2183--2194",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352134",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Many new memory indexing structures have been proposed
                 and outperform current in-memory skip-list index
                 adopted by LevelDB, RocksDB and other key--value
                 systems. However, those new indexes cannot be easily
                 integrated with key--value systems, because most of
                 them do not consider how the data can be efficiently
                 flushed to disk. Some assumptions, such as fixed size
                 key and value, are unrealistic for real applications.
                 In this paper, we present S3, a scalable in-memory
                 skip-list index for the customized version of RocksDB
                 in Alibaba Cloud. S3 adopts a two-layer structure. In
                 the top layer, a cache-sensitive structure is used to
                 maintain a few guard entries to facilitate the search
                 over the skip-list. In the bottom layer, a semi-ordered
                 skip-list index is built to support highly concurrent
                 insertions and fast lookup and range query. To further
                 improve the performance, we train a neural model to
                 select guard entries intelligently according to the
                 data distribution and query distribution. Experiments
                 on multiple datasets show that S3 achieves a comparable
                 performance to other new memory indexing schemes, and
                 can replace current in-memory skip-list of LevelDB and
                 RocksDB to support huge volume of data.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Masson:2019:DFF,
  author =       "Charles Masson and Jee E. Rim and Homin K. Lee",
  title =        "{DDSketch}: a fast and fully-mergeable quantile sketch
                 with relative-error guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2195--2205",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352135",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Summary statistics such as the mean and variance are
                 easily maintained for large, distributed data streams,
                 but order statistics (i.e., sample quantiles) can only
                 be approximately summarized. There is extensive
                 literature on maintaining quantile sketches where the
                 emphasis has been on bounding the rank error of the
                 sketch while using little memory. Unfortunately, rank
                 error guarantees do not preclude arbitrarily large
                 relative errors, and this often occurs in practice when
                 the data is heavily skewed. Given the distributed
                 nature of contemporary large-scale systems, another
                 crucial property for quantile sketches is
                 mergeablility, i.e., several combined sketches must be
                 as accurate as a single sketch of the same data. We
                 present the first fully-mergeable, relative-error
                 quantile sketching algorithm with formal guarantees.
                 The sketch is extremely fast and accurate, and is
                 currently being used by Datadog at a wide-scale.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Long:2019:DSL,
  author =       "Qiang Long and Wei Wang and Jinfu Deng and Song Liu
                 and Wenhao Huang and Fangying Chen and Sifan Liu",
  title =        "A distributed system for large-scale $n$-gram language
                 models at {Tencent}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2206--2217",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352136",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "n-gram language models are widely used in language
                 processing applications, e.g., automatic speech
                 recognition, for ranking the candidate word sequences
                 generated from the generator model, e.g., the acoustic
                 model. Large n-gram models typically give good ranking
                 results; however, they require a huge amount of memory
                 storage. While distributing the model across multiple
                 nodes resolves the memory issue, it nonetheless incurs
                 a great network communication overhead and introduces a
                 different bottleneck. In this paper, we present our
                 distributed system developed at Tencent with novel
                 optimization techniques for reducing the network
                 overhead, including distributed indexing, batching and
                 caching. They reduce the network requests and
                 accelerate the operation on each single node. We also
                 propose a cascade fault-tolerance mechanism which
                 adaptively switches to small n-gram models depending on
                 the severity of the failure. Experimental study on 9
                 automatic speech recognition (ASR) datasets confirms
                 that our distributed system scales to large models
                 efficiently, effectively and robustly. We have
                 successfully deployed it for Tencent's WeChat ASR with
                 the peak network traffic at the scale of 100 millions
                 of messages per minute.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Dursun:2019:MDQ,
  author =       "Kayhan Dursun and Carsten Binnig and Ugur Cetintemel
                 and Garret Swart and Weiwei Gong",
  title =        "A morsel-driven query execution engine for
                 heterogeneous multi-cores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2218--2229",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352137",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Currently, we face the next major shift in processor
                 designs that arose from the physical limitations known
                 as the ``dark silicon effect''. Due to thermal
                 limitations and shrinking transistor sizes, multi-core
                 scaling is coming to an end. A major new direction that
                 hardware vendors are currently investigating involves
                 specialized and energy-efficient hardware accelerators
                 (e.g., ASICs) placed on the same die as the normal CPU
                 cores. In this paper, we present a novel query
                 processing engine called SiliconDB that targets such
                 heterogeneous processor environments. We leverage the
                 Sparc M7 platform to develop and test our ideas. Based
                 on the SSB benchmarks, as well as other micro
                 benchmarks, we compare the efficiency of SiliconDB with
                 existing execution strategies that make use of
                 co-processors (e.g., FPGAs, GPUs) and demonstrate
                 speed-up improvements of up to 2x.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cao:2019:SSS,
  author =       "Lei Cao and Wenbo Tao and Sungtae An and Jing Jin and
                 Yizhou Yan and Xiaoyu Liu and Wendong Ge and Adam Sah
                 and Leilani Battle and Jimeng Sun and Remco Chang and
                 Brandon Westover and Samuel Madden and Michael
                 Stonebraker",
  title =        "{Smile}: a system to support machine learning on {EEG}
                 data at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2230--2241",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352138",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In order to reduce the possibility of neural injury
                 from seizures and sidestep the need for a neurologist
                 to spend hours on manually reviewing the EEG recording,
                 it is critical to automatically detect and classify
                 ``interictal-ictal continuum'' (IIC) patterns from EEG
                 data. However, the existing IIC classification
                 techniques are shown to be not accurate and robust
                 enough for clinical use because of the lack of high
                 quality labels of EEG segments as training data.
                 Obtaining high-quality labeled data is traditionally a
                 manual process by trained clinicians that can be
                 tedious, time-consuming, and error-prone. In this work,
                 we propose Smile, an industrial scale system that
                 provides an end-to-end solution to the IIC pattern
                 classification problem. The core components of Smile
                 include a visualization-based time series labeling
                 module and a deep-learning based active learning
                 module. The labeling module enables the users to
                 explore and label 350 million EEG segments (30TB) at
                 interactive speed. The multiple coordinated views allow
                 the users to examine the EEG signals from both time
                 domain and frequency domain simultaneously. The active
                 learning module first trains a deep neural network that
                 automatically extracts both the local features with
                 respect to each segment itself and the long term
                 dynamics of the EEG signals to classify IIC patterns.
                 Then leveraging the output of the deep learning model,
                 the EEG segments that can best improve the model are
                 selected and prompted to clinicians to label. This
                 process is iterated until the clinicians and the models
                 show high degree of agreement. Our initial experimental
                 results show that our Smile system allows the
                 clinicians to label the EEG segments at will with a
                 response time below 500 ms. The accuracy of the model
                 is progressively improved as more and more high quality
                 labels are acquired over time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Green:2019:UGD,
  author =       "Alastair Green and Paolo Guagliardo and Leonid Libkin
                 and Tobias Lindaaker and Victor Marsault and Stefan
                 Plantikow and Martin Schuster and Petra Selmer and
                 Hannes Voigt",
  title =        "Updating graph databases with {Cypher}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2242--2254",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352139",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The paper describes the present and the future of
                 graph updates in Cypher, the language of the Neo4j
                 property graph database and several other products.
                 Update features include those with clear analogs in
                 relational databases, as well as those that do not
                 correspond to any relational operators. Moreover,
                 unlike SQL, Cypher updates can be arbitrarily
                 intertwined with querying clauses. After presenting the
                 current state of update features, we point out their
                 shortcomings, most notably violations of atomicity and
                 non-deterministic behavior of updates. These have not
                 been previously known in the Cypher community. We then
                 describe the industry-academia collaboration on
                 designing a revised set of Cypher update operations.
                 Based on discovered shortcomings of update features, a
                 number of possible solutions were devised. They were
                 presented to key Cypher users, who were given the
                 opportunity to comment on how update features are used
                 in real life, and on their preferences for proposed
                 fixes. As the result of the consultation, a new set of
                 update operations for Cypher were designed. Those led
                 to a streamlined syntax, and eliminated the unexpected
                 and problematic behavior that original Cypher updates
                 exhibited.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kamsky:2019:ATC,
  author =       "Asya Kamsky",
  title =        "Adapting {TPC-C} benchmark to measure performance of
                 multi-document transactions in {MongoDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2254--2262",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352140",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "MongoDB is a popular distributed database that
                 supports replication, horizontal partitioning
                 (sharding), a flexible document schema and ACID
                 guarantees on the document level. While it is generally
                 grouped with ``NoSQL'' databases, MongoDB provides many
                 features similar to those of traditional RDBMS such as
                 secondary indexes, an ad hoc query language, support
                 for complex aggregations, and new as of version 4.0
                 multi-statement, multi-document ACID transactions. We
                 looked for a well understood OLTP workload benchmark to
                 use in our own system performance test suite to
                 establish a baseline of transaction performance to
                 enable flagging performance regressions, as well as
                 improvements as we continue to add new functionality.
                 While there exist many published and widely used
                 benchmarks for RDBMS OLTP workloads, there are none
                 specifically for document databases. This paper
                 describes the process of adapting an existing
                 traditional RDBMS benchmark to MongoDB query language
                 and transaction semantics to allow measuring
                 transaction performance. We chose to adapt the TPC-C
                 benchmark even though it assumes a relational database
                 schema and SQL, hence extensive changes had to be made
                 to stay consistent with MongoDB best practices. Our
                 goal did not include creating official TPC-C
                 certifiable results, however, every attempt was made to
                 stay consistent with the spirit of the original
                 benchmark specification as well as to be compliant to
                 all specification requirements where possible. We
                 discovered that following best practices for document
                 schema design achieves better performance than using
                 required normalized schema. All the source code used
                 and validation scripts are published in github to allow
                 the reader to recreate and verify our results.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2019:CND,
  author =       "Feifei Li",
  title =        "Cloud-native database systems at {Alibaba}:
                 opportunities and challenges",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2263--2272",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352141",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Cloud-native databases become increasingly important
                 for the era of cloud computing, due to the needs for
                 elasticity and on-demand usage by various applications.
                 These challenges from cloud applications present new
                 opportunities for cloud-native databases that cannot be
                 fully addressed by traditional on-premise enterprise
                 database systems. A cloud-native database leverages
                 software-hardware co-design to explore accelerations
                 offered by new hardware such as RDMA, NVM, kernel
                 bypassing protocols such as DPDK. Meanwhile, new design
                 architectures, such as shared storage, enable a
                 cloud-native database to decouple computation from
                 storage and provide excellent elasticity. For highly
                 concurrent workloads that require horizontal
                 scalability, a cloud-native database can leverage a
                 shared-nothing layer to provide distributed query and
                 transaction processing. Applications also require
                 cloud-native databases to offer high availability
                 through distributed consensus protocols. At Alibaba, we
                 have explored a suite of technologies to design
                 cloud-native database systems. Our storage engine,
                 X-Engine and PolarFS, improves both write and read
                 throughputs by using a LSM-tree design and self-adapted
                 separation of hot and cold data records. Based on these
                 efforts, we have designed and implemented POLARDB and
                 its distributed version POLARDB-X, which has
                 successfully supported the extreme transaction
                 workloads during the 2018 Global Shopping Festival on
                 November 11, 2018, and achieved commercial success on
                 Alibaba Cloud. We have also designed an OLAP system
                 called AnalyticDB (ADB in short) for enabling real-time
                 interactive data analytics for big data. We have
                 explored a self-driving database platform to achieve
                 autoscaling and intelligent database management. We
                 will report key technologies and lessons learned to
                 highlight the technical challenges and opportunities
                 for cloud-native database systems at Alibaba.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Boehm:2019:MME,
  author =       "Alexander Boehm",
  title =        "In-memory for the masses: enabling cost-efficient
                 deployments of in-memory data management platforms for
                 business applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2273--2275",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352142",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "With unrivaled performance, modern in-memory data
                 management platforms such as SAP HANA [5] enable the
                 creation of novel types of business applications. By
                 keeping all data in memory, applications may combine
                 both demanding transactional as well as complex
                 analytical workloads in the context of a single system.
                 While this excellent performance, data freshness, and
                 flexibility gain is highly desirable in a vast range of
                 modern business applications [6], the corresponding
                 large appetite for main memory has significant
                 implications on server sizing. Particularly, hardware
                 costs on premise as well as in the cloud are at risk to
                 increase significantly, driven by the high amount of
                 DRAM that needs to be provisioned potentially. In this
                 talk, we discuss a variety of challenges and
                 opportunities that arise when running business
                 applications in a cost-efficient manner on in-memory
                 database systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hubail:2019:CAN,
  author =       "Murtadha {Al Hubail} and Ali Alsuliman and Michael
                 Blow and Michael Carey and Dmitry Lychagin and Ian
                 Maxon and Till Westmann",
  title =        "Couchbase analytics: {NoETL} for scalable {NoSQL} data
                 analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2275--2286",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352143",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Couchbase Server is a highly scalable
                 document-oriented database management system. With a
                 shared-nothing architecture, it exposes a fast
                 key--value store with a managed cache for
                 sub-millisecond data operations, indexing for fast
                 queries, and a powerful query engine for executing
                 declarative SQL-like queries. Its Query Service debuted
                 several years ago and supports high volumes of
                 low-latency queries and updates for JSON documents. Its
                 recently introduced Analytics Service complements the
                 Query Service. Couchbase Analytics, the focus of this
                 paper, supports complex analytical queries (e.g., ad
                 hoc joins and aggregations) over large collections of
                 JSON documents. This paper describes the Analytics
                 Service from the outside in, including its user model,
                 its SQL++ based query language, and its MPP-based
                 storage and query processing architecture. It also
                 briefly touches on the relationship of Couchbase
                 Analytics to Apache AsterixDB, the open source Big Data
                 management system at the core of Couchbase Analytics.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Colyer:2019:PS,
  author =       "Adrian Colyer",
  title =        "Performance in the spotlight",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2287--2289",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352144",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Performance in its various guises features prominently
                 in research evaluations, and rightly so. Without
                 adequate performance a system is not fit for purpose.
                 That doesn't necessarily mean we should pursue
                 performance at all costs though. In this talk we'll
                 explore a variety of additional evaluation criteria,
                 with a focus on those that are most important to
                 practitioners, and ask whether or not considering them
                 can open up interesting avenues of research.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Abouzied:2019:ILS,
  author =       "Azza Abouzied and Daniel J. Abadi and Kamil
                 Bajda-Pawlikowski and Avi Silberschatz",
  title =        "Integration of large-scale data processing systems and
                 traditional parallel database technology",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2290--2299",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352145",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In 2009 we explored the feasibility of building a
                 hybrid SQL data analysis system that takes the best
                 features from two competing technologies: large-scale
                 data processing systems (such as Google MapReduce and
                 Apache Hadoop) and parallel database management systems
                 (such as Greenplum and Vertica). We built a prototype,
                 HadoopDB, and demonstrated that it can deliver the high
                 SQL query performance and efficiency of parallel
                 database management systems while still providing the
                 scalability, fault tolerance, and flexibility of
                 large-scale data processing systems. Subsequently,
                 HadoopDB grew into a commercial product, Hadapt, whose
                 technology was eventually acquired by Teradata. In this
                 paper, we provide an overview of HadoopDB's original
                 design, and its evolution during the subsequent ten
                 years of research and development effort. We describe
                 how the project innovated both in the research lab, and
                 as a commercial product at Hadapt and Teradata. We then
                 discuss the current vibrant ecosystem of software
                 projects (most of which are open source) that continued
                 HadoopDB's legacy of implementing a systems level
                 integration of large-scale data processing systems and
                 parallel database technology.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cooper:2019:PSL,
  author =       "Brian F. Cooper and P. P. S. Narayan and Raghu
                 Ramakrishnan and Utkarsh Srivastava and Adam
                 Silberstein and Philip Bohannon and Hans-Arno Jacobsen
                 and Nick Puz and Daniel Weaver and Ramana Yerneni",
  title =        "{PNUTS} to {Sherpa}: lessons from {Yahoo!}'s cloud
                 database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2300--2307",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352146",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we look back at the evolution of
                 Yahoo!'s geo-replicated cloud data store from a
                 research project called PNUTS to a globally deployed
                 production system called Sherpa, share some of the
                 lessons learned along the way, and finally, compare
                 PNUTS with current operational cloud stores.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Tan:2019:WPD,
  author =       "Wang-Chiew Tan",
  title =        "What {I} probably did right and what {I} think {I}
                 could have done better",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2308--2308",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352147",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "They say a lot of good things in life are not free.
                 Success is one of them. Successful research requires an
                 immense amount of hard work and dedication over a long
                 period of time. For better or worse, hard work alone
                 does not guarantee success. In my experience, success
                 is a marathon of hard work and some luck along the way.
                 What is often forgotten is that it is important to
                 enjoy the journey of hard work and appreciate many
                 experiences and relationships along the way. I am
                 deeply honored to receive the 2019 VLDB Women in
                 Database Research Award. In the talk, I will share with
                 you a retrospective of my journey so far, what I
                 probably did right along the way, and perhaps more
                 importantly, the many things I think I could have done
                 better as a computer scientist and especially a female
                 computer scientist.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Parameswaran:2019:EDS,
  author =       "Aditya Parameswaran",
  title =        "Enabling data science for the majority",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2309--2322",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352148",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Despite great strides in the generation, collection,
                 and processing of data at scale, data science is still
                 extremely inconvenient for the vast majority of the
                 population. The driving goal of our research, over the
                 past half decade, has been to make it easy for
                 individuals and teams---regardless of programming or
                 analysis expertise---manage, analyze, make sense of,
                 and draw insights from large datasets. In this article,
                 we reflect on a comprehensive suite of tools that we've
                 been building to empower everyone to perform data
                 science more efficiently and effortlessly, including
                 DataSpread, a scalable spreadsheet tool that combines
                 the benefits of spreadsheets and databases, and
                 ZenVisage, a visual exploration tool that accelerates
                 the discovery of trends or patterns. Our tools have
                 been developed in collaboration with experts in various
                 disciplines, including neuroscience, battery science,
                 genomics, astrophysics, and ad analytics. We will
                 discuss some of the key technical challenges underlying
                 the development of these tools, and how we addressed
                 them, drawing from ideas in multiple disciplines. in
                 the process, we will outline a research agenda for tool
                 development to empower everyone to tap into the hidden
                 potential in their datasets at scale.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Rekatsinas:2019:ODM,
  author =       "Theodoras Rekatsinas and Sudeepa Roy and Manasi Vartak
                 and Ce Zhang and Neoklis Polyzotis",
  title =        "Opportunities for data management research in the era
                 of horizontal {AI\slash ML}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "12",
  pages =        "2323--2323",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3352063.3352149",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:02 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "AI/ML is becoming a horizontal technology: its
                 application is expanding to more domains, and its
                 integration touches more parts of the technology stack.
                 Given the strong dependence of ML on data, this
                 expansion creates a new space for applying data
                 management techniques. At the same time, the deeper
                 integration of ML in the technology stack provides more
                 touch points where ML can be used in data management
                 systems and vice versa. In this panel, we invite
                 researchers working in this domain to discuss this
                 emerging world and its implications on data-management
                 research. Among other topics, the discussion will touch
                 on the opportunities for interesting research, how we
                 can interact with other communities, what is the core
                 expertise we bring to the table, and how we can conduct
                 and evaluate this research effectively within our own
                 community. The goal of the panel is to nudge the
                 community to appreciate the opportunities in this new
                 world of horizontal AI/ML and to spur a discussion on
                 how we can shape an effective research agenda.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Barthels:2019:SCH,
  author =       "Claude Barthels and Ingo M{\"u}ller and Konstantin
                 Taranov and Gustavo Alonso and Torsten Hoefler",
  title =        "Strong consistency is not hard to get: two-phase
                 locking and two-phase commit on thousands of cores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "13",
  pages =        "2325--2338",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3358701.3358702",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 26 07:21:38 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Concurrency control is a cornerstone of distributed
                 database engines and storage systems. In pursuit of
                 scalability, a common assumption is that Two-Phase
                 Locking (2PL) and Two-Phase Commit (2PC) are not viable
                 solutions due to their communication overhead. Recent
                 results, however, have hinted that 2PL and 2PC might
                 not have such a bad performance. Nevertheless, there
                 has been no attempt to actually measure how a
                 state-of-the-art implementation of 2PL and 2PC would
                 perform on modern hardware. The goal of this paper is
                 to establish a baseline for concurrency control
                 mechanisms on thousands of cores connected through a
                 low-latency network. We develop a distributed lock
                 table supporting all the standard locking modes used in
                 database engines. We focus on strong consistency in the
                 form of strict serializability implemented through
                 strict 2PL, but also explore read-committed and
                 repeatable-read, two common isolation levels used in
                 many systems. We do not leverage any known
                 optimizations in the locking or commit parts of the
                 protocols. The surprising result is that, for TPC-C,
                 2PL and 2PC can be made to scale to thousands of cores
                 and hundreds of machines, reaching a throughput of over
                 21 million transactions per second with 9.5 million New
                 Order operations per second. Since most existing
                 relational database engines use some form of locking
                 for implementing concurrency control, our findings
                 provide a path for such systems to scale without having
                 to significantly redesign transaction management. To
                 achieve these results, our implementation relies on
                 Remote Direct Memory Access (RDMA). Today, this
                 technology is commonly available on both Infiniband as
                 well as Ethernet networks, making the results valid
                 across a wide range of systems and platforms, including
                 database appliances, data centers, and cloud
                 environments.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wei:2019:DRE,
  author =       "Ziheng Wei and Uwe Leck and Sebastian Link",
  title =        "Discovery and ranking of embedded uniqueness
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "13",
  pages =        "2339--2352",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3358701.3358703",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 26 07:21:38 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data profiling is an enabler for efficient data
                 management and effective analytics. The discovery of
                 data dependencies is at the core of data profiling. We
                 conduct the first study on the discovery of embedded
                 uniqueness constraints (eUCs). These constraints
                 represents unique column combinations embedded in
                 complete fragments of incomplete data. We showcase
                 their implementation as filtered indexes, and their
                 application in integrity management and query
                 optimization. We show that the decision variant of
                 discovering a minimal eUC is NP-complete and
                 W[2]-complete. We characterize the maximum possible
                 solution size, and show which families of eUCs attain
                 that size. Despite the challenges, experiments with
                 real-world and synthetic benchmark data show that our
                 column(row)-efficient algorithms perform well with a
                 large number of columns(rows), and our hybrid algorithm
                 combines ideas from both. We show how to rank eUCs to
                 help identify relevant eUCs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chu:2019:ODB,
  author =       "Lingyang Chu and Yanyan Zhang and Yu Yang and Lanjun
                 Wang and Jian Pei",
  title =        "Online density bursting subgraph detection from
                 temporal graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "13",
  pages =        "2353--2365",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3358701.3358704",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 26 07:21:38 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a temporal weighted graph that consists of a
                 potentially endless stream of updates, we are
                 interested in finding density bursting subgraphs (DBS
                 for short), where a DBS is a subgraph that accumulates
                 its density at the fastest speed. Online DBS detection
                 enjoys many novel applications. At the same time, it is
                 challenging since the time duration of a DBS can be
                 arbitrarily long but a limited size storage can buffer
                 only up to a certain number of updates. To tackle this
                 problem, we observe the critical decomposability of
                 DBSs and show that a DBS with a long time duration can
                 be decomposed into a set of indecomposable DBSs with
                 equal or larger burstiness. We further prove that the
                 time duration of an indecomposable DBS is upper bounded
                 and propose an efficient method TopkDBSOL to detect
                 indecomposable DBSs in an online manner. Extensive
                 experiments demonstrate the effectiveness, efficiency
                 and scalability of TopkDBSOL in detecting significant
                 DBSs from temporal graphs in real applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Holanda:2019:PII,
  author =       "Pedro Holanda and Mark Raasveldt and Stefan Manegold
                 and Hannes M{\"u}hleisen",
  title =        "Progressive indexes: indexing for interactive data
                 analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "13",
  pages =        "2366--2378",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3358701.3358705",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 26 07:21:38 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Interactive exploration of large volumes of data is
                 increasingly common, as data scientists attempt to
                 extract interesting information from large opaque data
                 sets. This scenario presents a difficult challenge for
                 traditional database systems, as (1) nothing is known
                 about the query workload in advance, (2) the query
                 workload is constantly changing, and (3) the system
                 must provide interactive responses to the issued
                 queries. This environment is challenging for index
                 creation, as traditional database indexes require
                 upfront creation, hence a priori workload knowledge, to
                 be efficient. In this paper, we introduce Progressive
                 Indexing, a novel performance-driven indexing technique
                 that focuses on automatic index creation while
                 providing interactive response times to incoming
                 queries. Its design allows queries to have a limited
                 budget to spend on index creation. The indexing budget
                 is automatically tuned to each query before query
                 processing. This allows for systems to provide
                 interactive answers to queries during index creation
                 while being robust against various workload patterns
                 and data distributions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Hanai:2019:DEP,
  author =       "Masatoshi Hanai and Toyotaro Suzumura and Wen Jun Tan
                 and Elvis Liu and Georgios Theodoropoulos and Wentong
                 Cai",
  title =        "Distributed edge partitioning for trillion-edge
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "13",
  pages =        "2379--2392",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3358701.3358706",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 26 07:21:38 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We propose Distributed Neighbor Expansion (Distributed
                 NE), a parallel and distributed graph partitioning
                 method that can scale to trillion-edge graphs while
                 providing high partitioning quality. Distributed NE is
                 based on a new heuristic, called parallel expansion,
                 where each partition is constructed in parallel by
                 greedily expanding its edge set from a single vertex in
                 such a way that the increase of the vertex cuts becomes
                 local minimal. We theoretically prove that the proposed
                 method has the upper bound in the partitioning quality.
                 The empirical evaluation with various graphs shows that
                 the proposed method produces higher-quality partitions
                 than the state-of-the-art distributed graph
                 partitioning algorithms. The performance evaluation
                 shows that the space efficiency of the proposed method
                 is an order-of-magnitude better than the existing
                 algorithms, keeping its time efficiency comparable. As
                 a result, Distributed NE can partition a trillion-edge
                 graph using only 256 machines within 70 minutes.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Athanassoulis:2019:OCL,
  author =       "Manos Athanassoulis and Kenneth S. B{\o}gh and Stratos
                 Idreos",
  title =        "Optimal column layout for hybrid workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "13",
  pages =        "2393--2407",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3358701.3358707",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 26 07:21:38 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data-intensive analytical applications need to support
                 both efficient reads and writes. However, what is
                 usually a good data layout for an update-heavy
                 workload, is not well-suited for a read-mostly one and
                 vice versa. Modern analytical data systems rely on
                 columnar layouts and employ delta stores to inject new
                 data and updates. We show that for hybrid workloads we
                 can achieve close to one order of magnitude better
                 performance by tailoring the column layout design to
                 the data and query workload. Our approach navigates the
                 possible design space of the physical layout: it
                 organizes each column's data by determining the number
                 of partitions, their corresponding sizes and ranges,
                 and the amount of buffer space and how it is allocated.
                 We frame these design decisions as an optimization
                 problem that, given workload knowledge and performance
                 requirements, provides an optimal physical layout for
                 the workload at hand. To evaluate this work, we build
                 an in-memory storage engine, Casper, and we show that
                 it outperforms state-of-the-art data layouts of
                 analytical systems for hybrid workloads. Casper
                 delivers up to 2.32x higher throughput for
                 update-intensive workloads and up to 2.14x higher
                 throughput for hybrid workloads. We further show how to
                 make data layout decisions robust to workload variation
                 by carefully selecting the input of the optimization.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sintos:2019:SDC,
  author =       "Stavros Sintos and Pankaj K. Agarwal and Jun Yang",
  title =        "Selecting data to clean for fact checking: minimizing
                 uncertainty vs. maximizing surprise",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "12",
  number =       "13",
  pages =        "2408--2421",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3358701.3358708",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Nov 26 07:21:38 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the optimization problem of selecting
                 numerical quantities to clean in order to fact-check
                 claims based on such data. Oftentimes, such claims are
                 technically correct, but they can still mislead for two
                 reasons. First, data may contain uncertainty and
                 errors. Second, data can be ``fished'' to advance
                 particular positions. In practice, fact-checkers cannot
                 afford to clean all data and must choose to clean what
                 ``matters the most'' to checking a claim. We explore
                 alternative definitions of what ``matters the most'':
                 one is to ascertain claim qualities (by minimizing
                 uncertainty in these measures), while an alternative is
                 just to counter the claim (by maximizing the
                 probability of finding a counterargument). We show
                 whether the two objectives align with each other, with
                 important implications on when fact-checkers should
                 exercise care in selective data cleaning, to avoid
                 potential bias introduced by their desire to counter
                 claims. We develop efficient algorithms for solving the
                 various variants of the optimization problem, showing
                 significant improvements over naive solutions. The
                 problem is particularly challenging because the
                 objectives in the fact-checking context are complex,
                 non-linear functions over data. We obtain results that
                 generalize to a large class of functions, with
                 potential applications beyond fact-checking.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Chawla:2019:RMQ,
  author =       "Shuchi Chawla and Shaleen Deep and Paraschos Koutrisw
                 and Yifeng Teng",
  title =        "Revenue maximization for query pricing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "1",
  pages =        "1--14",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3357377.3357378",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:03 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Buying and selling of data online has increased
                 substantially over the last few years. Several
                 frameworks have already been proposed that study query
                 pricing in theory and practice. The key guiding
                 principle in these works is the notion of
                 arbitrage-freeness where the broker can set different
                 prices for different queries made to the dataset, but
                 must ensure that the pricing function does not provide
                 the buyers with opportunities for arbitrage. However,
                 little is known about revenue maximization aspect of
                 query pricing. In this paper, we study the problem
                 faced by a broker selling access to data with the goal
                 of maximizing her revenue. We show that this problem
                 can be formulated as a revenue maximization problem
                 with single-minded buyers and unlimited supply, for
                 which several approximation algorithms are known. We
                 perform an extensive empirical evaluation of the
                 performance of several pricing algorithms for the query
                 pricing problem on real-world instances. In addition to
                 previously known approximation algorithms, we propose
                 several new heuristics and analyze them both
                 theoretically and experimentally. Our experiments show
                 that algorithms with the best theoretical bounds are
                 not necessarily the best empirically. We identify
                 algorithms and heuristics that are both fast and also
                 provide consistently good performance when valuations
                 are drawn from a wide variety of distributions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shi:2019:RTP,
  author =       "Jieming Shi and Renchi Yang and Tianyuan Jin and
                 Xiaokui Xiao and Yin Yang",
  title =        "Realtime top-$k$ {Personalized PageRank} over large
                 graphs on {GPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "1",
  pages =        "15--28",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3357377.3357379",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:03 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a graph G, a source node s \in G and a positive
                 integer k, a top- k Personalized PageRank (PPR) query
                 returns the k nodes with the highest PPR values with
                 respect to s, where the PPR of a node v measures its
                 relevance from the perspective of source s. Top- k PPR
                 processing is a fundamental task in many important
                 applications such as web search, social networks, and
                 graph analytics. This paper aims to answer such a query
                 in realtime, i.e., within less than 100ms, on an
                 Internet-scale graph with billions of edges. This is
                 far beyond the current state of the art, due to the
                 immense computational cost of processing a PPR query.
                 We achieve this goal with a novel algorithm kPAR, which
                 utilizes the massive parallel processing power of GPUs.
                 The main challenge in designing a GPU-based PPR
                 algorithm lies in that a GPU is mainly a parallel
                 computation device, whereas PPR processing involves
                 graph traversals and value propagation operations,
                 which are inherently sequential and memory-bound.
                 Existing scalable PPR algorithms are mostly described
                 as single-thread CPU solutions that are resistant to
                 parallelization. Further, they usually involve complex
                 data structures which do not have efficient adaptations
                 on GPUs. kPAR overcomes these problems via both novel
                 algorithmic designs (namely, adaptive forward push and
                 inverted random walks ) and system engineering (e.g.,
                 load balancing) to realize the potential of GPUs.
                 Meanwhile, kPAR provides rigorous guarantees on both
                 result quality and worst-case efficiency. Extensive
                 experiments show that kPAR is usually 10x faster than
                 parallel adaptations of existing methods. Notably, on a
                 billion-edge Twitter graph, kPAR answers a top-1000 PPR
                 query in 42.4 milliseconds.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:FLS,
  author =       "Sheng Wang and Zhifeng Bao and J. Shane Culpepper and
                 Timos Sellis and Xiaolin Qin",
  title =        "Fast large-scale trajectory clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "1",
  pages =        "29--42",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3357377.3357380",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:03 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In this paper, we study the problem of large-scale
                 trajectory data clustering, k -paths, which aims to
                 efficiently identify k ``representative'' paths in a
                 road network. Unlike traditional clustering approaches
                 that require multiple data-dependent hyperparameters, k
                 -paths can be used for visual exploration in
                 applications such as traffic monitoring, public transit
                 planning, and site selection. By combining map matching
                 with an efficient intermediate representation of
                 trajectories and a novel edge-based distance (EBD)
                 measure, we present a scalable clustering method to
                 solve k -paths. Experiments verify that we can cluster
                 millions of taxi trajectories in less than one minute,
                 achieving improvements of up to two orders of magnitude
                 over state-of-the-art solutions that solve similar
                 trajectory clustering problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Herodotou:2019:ADT,
  author =       "Herodotos Herodotou and Elena Kakoulli",
  title =        "Automating distributed tiered storage management in
                 cluster computing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "1",
  pages =        "43--56",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3357377.3357381",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:03 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data-intensive platforms such as Hadoop and Spark are
                 routinely used to process massive amounts of data
                 residing on distributed file systems like HDFS.
                 Increasing memory sizes and new hardware technologies
                 (e.g., NVRAM, SSDs) have recently led to the
                 introduction of storage tiering in such settings.
                 However, users are now burdened with the additional
                 complexity of managing the multiple storage tiers and
                 the data residing on them while trying to optimize
                 their workloads. In this paper, we develop a general
                 framework for automatically moving data across the
                 available storage tiers in distributed file systems.
                 Moreover, we employ machine learning for tracking and
                 predicting file access patterns, which we use to decide
                 when and which data to move up or down the storage
                 tiers for increasing system performance. Our approach
                 uses incremental learning to dynamically refine the
                 models with new file accesses, allowing them to
                 naturally adjust and adapt to workload changes over
                 time. Our extensive evaluation using realistic
                 workloads derived from Facebook and CMU traces compares
                 our approach with several other policies and showcases
                 significant benefits in terms of both workload
                 performance and cluster efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Jung:2019:AAD,
  author =       "Jinho Jung and Hong Hu and Joy Arulraj and Taesoo Kim
                 and Woonhak Kang",
  title =        "{APOLLO}: automatic detection and diagnosis of
                 performance regressions in database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "1",
  pages =        "57--70",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3357377.3357382",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:03 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The practical art of constructing database management
                 systems (DBMSs) involves a morass of trade-offs among
                 query execution speed, query optimization speed,
                 standards compliance, feature parity, modularity,
                 portability, and other goals. It is no surprise that
                 DBMSs, like all complex software systems, contain bugs
                 that can adversely affect their performance. The
                 performance of DBMSs is an important metric as it
                 determines how quickly an application can take in new
                 information and use it to make new decisions. Both
                 developers and users face challenges while dealing with
                 performance regression bugs. First, developers usually
                 find it challenging to manually design test cases to
                 uncover performance regressions since DBMS components
                 tend to have complex interactions. Second, users
                 encountering performance regressions are often unable
                 to report them, as the regression-triggering queries
                 could be complex and database-dependent. Third,
                 developers have to expend a lot of effort on localizing
                 the root cause of the reported bugs, due to the system
                 complexity and software development complexity. Given
                 these challenges, this paper presents the design of
                 Apollo, a toolchain for automatically detecting,
                 reporting, and diagnosing performance regressions in
                 DBMSs. We demonstrate that Apollo automates the
                 generation of regression-triggering queries, simplifies
                 the bug reporting process for users, and enables
                 developers to quickly pinpoint the root cause of
                 performance regressions. By automating the detection
                 and diagnosis of performance regressions, Apollo
                 reduces the labor cost of developing efficient DBMSs.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Owaida:2019:LLD,
  author =       "Muhsen Owaida and Gustavo Alonso and Laura Fogliarini
                 and Anthony Hock-Koon and Pierre-Etienne Melet",
  title =        "Lowering the latency of data processing pipelines
                 through {FPGA} based hardware acceleration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "1",
  pages =        "71--85",
  month =        sep,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3357377.3357383",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 2 06:49:03 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Web search engines often involve a complex pipeline of
                 processing stages including computing, scoring, and
                 ranking potential answers plus returning the sorted
                 results. The latency of such pipelines can be improved
                 by minimizing data movement, making stages faster, and
                 merging stages. The throughput is determined by the
                 stage with the smallest capacity and it can be improved
                 by allocating enough parallel resources to each stage.
                 In this paper we explore the possibility of employing
                 hardware acceleration (an FPGA) as a way to improve the
                 overall performance when computing answers to search
                 queries. With a real use case as a baseline and
                 motivation, we focus on accelerating the scoring
                 function implemented as a decision tree ensemble, a
                 common approach to scoring and classification in search
                 systems. Our solution uses a novel decision tree
                 ensemble implementation on an FPGA to: (1) increase the
                 number of entries that can be scored per unit of time,
                 and (2) provide a compact implementation that can be
                 combined with previous stages. The resulting system,
                 tested in Amazon F1 instances, significantly improves
                 the quality of the search results and improves
                 performance by two orders of magnitude over the
                 existing CPU based solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Cai:2019:MSS,
  author =       "Shaofeng Cai and Gang Chen and Beng Chin Ooi and
                 Jinyang Gao",
  title =        "Model slicing for supporting complex analytics with
                 elastic inference cost and resource constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "86--99",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364325",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Deep learning models have been used to support
                 analytics beyond simple aggregation, where deeper and
                 wider models have been shown to yield great results.
                 These models consume a huge amount of memory and
                 computational operations. However, most of the
                 large-scale industrial applications are often
                 computational budget constrained. In practice, the peak
                 workload of inference service could be 10x higher than
                 the average cases, with the presence of unpredictable
                 extreme cases. Lots of computational resources could be
                 wasted during off-peak hours and the system may crash
                 when the workload exceeds system capacity. How to
                 support deep learning services with dynamic workload
                 cost-efficiently remains a challenging problem. In this
                 paper, we address the challenge with a general and
                 novel training scheme called model slicing, which
                 enables deep learning models to provide predictions
                 within the prescribed computational resource budget
                 dynamically. Model slicing could be viewed as an
                 elastic computation solution without requiring more
                 computational resources. Succinctly, each layer in the
                 model is divided into groups of contiguous block of
                 basic components (i.e. neurons in dense layers and
                 channels in convolutional layers), and then partially
                 ordered relation is introduced to these groups by
                 enforcing that groups participated in each forward pass
                 always starts from the first group to the
                 dynamically-determined rightmost group. Trained by
                 dynamically indexing the rightmost group with a single
                 parameter slice rate, the network is engendered to
                 build up group-wise and residual representation. Then
                 during inference, a sub-model with fewer groups can be
                 readily deployed for efficiency whose computation is
                 roughly quadratic to the width controlled by the slice
                 rate. Extensive experiments show that models trained
                 with model slicing can effectively support on-demand
                 workload with elastic inference cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Herlihy:2019:CCD,
  author =       "Maurice Herlihy and Barbara Liskov and Liuba Shrira",
  title =        "Cross-chain deals and adversarial commerce",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "100--113",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364326",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern distributed data management systems face a new
                 challenge: how can autonomous, mutually-distrusting
                 parties cooperate safely and effectively? Addressing
                 this challenge brings up questions familiar from
                 classical distributed systems: how to combine multiple
                 steps into a single atomic action, how to recover from
                 failures, and how to synchronize concurrent access to
                 data. Nevertheless, each of these issues requires
                 rethinking when participants are autonomous and
                 potentially adversarial. We propose the notion of a
                 cross-chain deal, a new way to structure complex
                 distributed computations that manage assets in an
                 adversarial setting. Deals are inspired by classical
                 atomic transactions, but are necessarily different, in
                 important ways, to accommodate the decentralized and
                 untrusting nature of the exchange. We describe novel
                 safety and liveness properties, along with two
                 alternative protocols for implementing cross-chain
                 deals in a system of independent blockchain ledgers.
                 One protocol, based on synchronous communication, is
                 fully decentralized, while the other, based on
                 semi-synchronous communication, requires a globally
                 shared ledger.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zois:2019:EMM,
  author =       "Vasileios Zois and Vassilis J. Tsotras and Walid A.
                 Najjar",
  title =        "Efficient main-memory top-$k$ selection for multicore
                 architectures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "114--127",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364327",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Efficient Top-$k$ query evaluation relies on practices
                 that utilize auxiliary data structures to enable early
                 termination. Such techniques were designed to trade-off
                 complex work in the buffer pool against costly access
                 to disk-resident data. Parallel in-memory Top-$k$
                 selection with support for early termination presents a
                 novel challenge because computation shifts higher up in
                 the memory hierarchy. In this environment, data scan
                 methods using SIMD instructions and multithreading
                 perform well despite requiring evaluation of the
                 complete dataset. Early termination schemes that favor
                 simplicity require random access to resolve score
                 ambiguity while those optimized for sequential access
                 incur too many object evaluations. In this work, we
                 introduce the concept of rank uncertainty, a measure of
                 work efficiency that enables classifying existing
                 solutions according to their potential for efficient
                 parallel in-memory Top-fc selection. We identify data
                 reordering and layering strategies as those having the
                 highest potential and provide practical guidelines on
                 how to adapt them for parallel in-memory execution
                 (creating the VTA and SLA approaches). In addition, we
                 show that the number of object evaluations can be
                 further decreased by combining data reordering with
                 angle space partitioning (introducing PTA). Our
                 extensive experimental evaluation on varying query
                 parameters using both synthetic and real data, showcase
                 that PTA exhibits between 2 and 4 orders of magnitude
                 better query latency, and throughput when compared to
                 prior work and our optimized algorithmic variants (i.e.
                 VTA, SLA).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Bottcher:2019:SGC,
  author =       "Jan B{\"o}ttcher and Viktor Leis and Thomas Neumann
                 and Alfons Kemper",
  title =        "Scalable garbage collection for in-memory {MVCC}
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "128--141",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364328",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "To support Hybrid Transaction and Analytical
                 Processing (HTAP), database systems generally rely on
                 Multi-Version Concurrency Control (MVCC). While MVCC
                 elegantly enables lightweight isolation of readers and
                 writers, it also generates outdated tuple versions,
                 which, eventually, have to be reclaimed. Surprisingly,
                 we have found that in HTAP workloads, this reclamation
                 of old versions, i.e., garbage collection, often
                 becomes the performance bottleneck. It turns out that
                 in the presence of long-running queries,
                 state-of-the-art garbage collectors are too
                 coarse-grained. As a consequence, the number of
                 versions grows quickly slowing down the entire system.
                 Moreover, the standard background cleaning approach
                 makes the system vulnerable to sudden spikes in
                 workloads. In this work, we propose a novel garbage
                 collection (GC) approach that prunes obsolete versions
                 eagerly. Its seamless integration into the transaction
                 processing keeps the GC overhead minimal and ensures
                 good scalability. We show that our approach handles
                 mixed workloads well and also speeds up pure OLTP
                 workloads like TPC-C compared to existing
                 state-of-the-art approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2019:FDD,
  author =       "Bohua Yang and Dong Wen and Lu Qin and Ying Zhang and
                 Xubo Wang and Xuemin Lin",
  title =        "Fully dynamic depth-first search in directed graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "142--154",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364329",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Depth-first search (DFS) is a fundamental and
                 important algorithm in graph analysis. It is the basis
                 of many graph algorithms such as computing strongly
                 connected components, testing planarity, and detecting
                 biconnected components. The result of a DFS is normally
                 shown as a DFS-Tree. Given the frequent updates in many
                 real-world graphs (e.g., social networks and
                 communication networks), we study the problem of
                 DFS-Tree maintenance in dynamic directed graphs. In the
                 literature, most works focus on the DFS-Tree
                 maintenance problem in undirected graphs and directed
                 acyclic graphs. However, their methods cannot easily be
                 applied in the case of general directed graphs.
                 Motivated by this, we propose a framework and
                 corresponding algorithms for both edge insertion and
                 deletion in general directed graphs. We further give
                 several optimizations to speed up the algorithms. We
                 conduct extensive experiments on 12 real-world datasets
                 to show the efficiency of our proposed algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ma:2019:LMC,
  author =       "Chenhao Ma and Reynold Cheng and Laks V. S. Lakshmanan
                 and Tobias Grubenmann and Yixiang Fang and Xiaodong
                 Li",
  title =        "{LINC}: a motif counting algorithm for uncertain
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "155--168",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364330",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "In graph applications (e.g., biological and social
                 networks), various analytics tasks (e.g., clustering
                 and community search) are carried out to extract
                 insight from large and complex graphs. Central to these
                 tasks is the counting of the number of motifs, which
                 are graphs with a few nodes. Recently, researchers have
                 developed several fast motif counting algorithms. Most
                 of these solutions assume that graphs are
                 deterministic, i.e., the graph edges are certain to
                 exist. However, due to measurement and statistical
                 prediction errors, this assumption may not hold, and
                 hence the analysis quality can be affected. To address
                 this issue, we examine how to count motifs on uncertain
                 graphs, whose edges only exist probabilistically.
                 Particularly, we propose a solution framework that can
                 be used by existing deterministic motif counting
                 algorithms. We further propose an approximation
                 algorithm. Extensive experiments on real datasets show
                 that our algorithms are more effective and efficient
                 than existing solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Eskandarian:2019:OOQ,
  author =       "Saba Eskandarian and Matei Zaharia",
  title =        "{ObliDB}: oblivious query processing for secure
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "169--183",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364331",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Hardware enclaves such as Intel SGX are a promising
                 technology for improving the security of databases
                 outsourced to the cloud. These enclaves provide an
                 execution environment isolated from the hyper-visor/OS,
                 and encrypt data in RAM. However, for applications that
                 use large amounts of memory, including most databases,
                 enclaves do not protect against access pattern leaks,
                 which let attackers gain a large amount of information
                 about the data. Moreover, the na{\"\i}ve way to address
                 this issue, using Oblivious RAM (ORAM) primitives from
                 the security literature, adds substantial overhead. A
                 number of recent works explore trusted hardware
                 enclaves as a path toward secure, access-pattern
                 oblivious outsourcing of data storage and analysis.
                 While these works efficiently solve specific
                 subproblems (e.g. building secure indexes or running
                 analytics queries that always scan entire tables), no
                 prior work has supported oblivious query processing for
                 general query workloads on a DBMS engine with multiple
                 access methods. Moreover, applying these techniques
                 individually does not guarantee that an end-to-end
                 workload, such as a complex SQL query over multiple
                 tables, will be oblivious. In this paper, we introduce
                 ObliDB, an oblivious database engine design that is the
                 first system to provide obliviousness for general
                 database read workloads over multiple access methods.
                 ObliDB introduces a diverse array of new oblivious
                 physical operators to accelerate oblivious SQL queries,
                 giving speedups of up to an order of magnitude over
                 na{\"\i}ve ORAM. It supports a broad range of queries,
                 including aggregation, joins, insertions, deletions and
                 point queries. We implement ObliDB and show that, on
                 analytics workloads, ObliDB ranges from 1.1--19x faster
                 than Opaque, a previous oblivious, enclave-based system
                 designed only for analytics, and comes within 2.6 x of
                 Spark SQL, which provides no security guarantees. In
                 addition, ObliDB supports point queries with 3--10ms
                 latency, which is comparable to index-only trusted
                 hardware systems, and runs over 7x faster than HIRB, a
                 previous encryption-based oblivious index system that
                 supports point queries.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ge:2019:SMP,
  author =       "Chang Ge and Ihab F. Ilyas and Florian Kerschbaum",
  title =        "Secure multi-party functional dependency discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "184--196",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364332",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data profiling is an important task to understand data
                 semantics and is an essential pre-processing step in
                 many tools. Due to privacy constraints, data is often
                 partitioned into silos, with different access control.
                 Discovering functional dependencies (FDs) usually
                 requires access to all data partitions to find
                 constraints that hold on the whole dataset. Simply
                 applying general secure multi-party computation
                 protocols incurs high computation and communication
                 cost. This paper formulates the FD discovery problem in
                 the secure multi-party scenario. We propose secure
                 constructions for validating candidate FDs, and present
                 efficient cryptographic protocols to discover FDs over
                 distributed partitions. Experimental results show that
                 solution is practically efficient over non-secure
                 distributed FD discovery, and can significantly
                 outperform general purpose multi-party computation
                 frameworks. To the best of our knowledge, our work is
                 the first one to tackle this problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Wang:2019:VFM,
  author =       "Minmei Wang and Mingxun Zhou and Shouqian Shi and Chen
                 Qian",
  title =        "Vacuum filters: more space-efficient and faster
                 replacement for {Bloom} and cuckoo filters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "197--210",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364333",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We present vacuum filters, a type of data structures
                 to support approximate membership queries. Vacuum
                 filters cost the smallest space among all known AMQ
                 data structures and provide higher insertion and lookup
                 throughput in most situations. Hence they can be used
                 as the replacement of the widely used Bloom filters and
                 cuckoo filters. Similar to cuckoo filters, vacuum
                 filters also store item fingerprints in a table. The
                 memory-efficiency and throughput improvements are from
                 the innovation of a table insertion and fingerprint
                 eviction strategy that achieves both high load factor
                 and data locality without any restriction of the table
                 size. In addition, we propose a new update framework to
                 resolve two difficult problems for AMQ structures under
                 dynamics, namely duplicate insertions and set resizing.
                 The experiments show that vacuum filters can achieve
                 25\% less space in average and similar throughput
                 compared to cuckoo filters, and 15\% less space and $ >
                 10 \times $ throughput compared to Bloom filters, with
                 same false positive rates. AMQ data structures are
                 widely used in various layers of computer systems and
                 networks and are usually hosted in platforms where
                 memory is limited and precious. Hence the improvements
                 brought by vacuum filters can be considered
                 significant.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2019:SES,
  author =       "Yihan Sun and Guy E. Blelloch and Wan Shen Lim and
                 Andrew Pavlo",
  title =        "On supporting efficient snapshot isolation for hybrid
                 workloads with multi-versioned indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "2",
  pages =        "211--225",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3364324.3364334",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:12 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Modern data-driven applications require that databases
                 support fast analytical queries while undergoing rapid
                 updates---often referred to as Hybrid Transactional
                 Analytical Processing (HTAP). Achieving fast queries
                 and updates in a database management system (DBMS) is
                 challenging since optimizations to improve analytical
                 queries can cause overhead for updates. One solution is
                 to use snapshot isolation (SI) for multi-version
                 concurrency control (MVCC) to allow readers to make
                 progress regardless of concurrent writers. In this
                 paper, we propose the Parallel Binary Tree (P-Tree)
                 index structure to achieve SI and MVCC for multicore
                 in-memory HTAP DBMSs. At their core, P-Trees are based
                 on pure (immutable) data structures that use
                 path-copying for updates for fast multi-versioning.
                 They support tree nesting to improve OLAP performance
                 while still allowing for efficient updates. The data
                 structure also enables parallel algorithms for bulk
                 operations on indexes and their underlying tables. We
                 evaluate P-Trees on OLTP and OLAP benchmarks, and
                 compare them with state-of-the-art data structures and
                 DBMSs. Our experiments show that P-Trees outperform
                 many concurrent data structures for the YCSB workload,
                 and is 4--9 x faster than existing DBMSs for analytical
                 queries, while also achieving reasonable throughput for
                 simultaneous transactional updates.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Fang:2019:IMV,
  author =       "Zhuhe Fang and Beilei Zheng and Chuliang Weng",
  title =        "Interleaved multi-vectorizing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "226--238",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368290",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "SIMD is an instruction set in mainstream processors,
                 which provides the data level parallelism to accelerate
                 the performance of applications. However, its
                 advantages diminish when applications suffer from heavy
                 cache misses. To eliminate cache misses in SIMD
                 vectorization, we present interleaved multi-vectorizing
                 (IMV) in this paper. It interleaves multiple execution
                 instances of vectorized code to hide memory access
                 latency with more computation. We also propose residual
                 vectorized states to solve the control flow divergence
                 in vectorization. IMV can make full use of the data
                 parallelism in SIMD and the memory level parallelism
                 through prefetching. It reduces cache misses, branch
                 misses and computation overhead to significantly speed
                 up the performance of pointer-chasing applications, and
                 it can be applied to executing entire query pipelines.
                 As experimental results show, IMV achieves up to 4.23X
                 and 3.17X better performance compared with the pure
                 scalar implementation and the pure SIMD vectorization,
                 respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Shetiya:2019:UOA,
  author =       "Suraj Shetiya and Abolfazl Asudeh and Sadia Ahmed and
                 Gautam Das",
  title =        "A unified optimization algorithm for solving
                 {``regret-minimizing representative''} problems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "239--251",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368291",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Given a database with numeric attributes, it is often
                 of interest to rank the tuples according to linear
                 scoring functions. For a scoring function and a subset
                 of tuples, the regret of the subset is defined as the
                 (relative) difference in scores between the top-1 tuple
                 of the subset and the top-1 tuple of the entire
                 database. Finding the regret-ratio minimizing set
                 (RRMS), i.e., the subset of a required size k that
                 minimizes the maximum regret-ratio across all possible
                 ranking functions, has been a well-studied problem in
                 recent years. This problem is known to be NP-complete
                 and there are several approximation algorithms for it.
                 Other NP-complete variants have also been investigated,
                 e.g., finding the set of size k that minimizes the
                 average regret ratio over all linear functions. Prior
                 work have designed customized algorithms for different
                 variants of the problem, and are unlikely to easily
                 generalize to other variants. In this paper we take a
                 different path towards tackling these problems. In
                 contrast to the prior, we propose a unified algorithm
                 for solving different problem variants. Unification is
                 done by localizing the customization to the design of
                 variant-specific subroutines or ``oracles'' that are
                 called by our algorithm. Our unified algorithm takes
                 inspiration from the seemingly unrelated problem of
                 clustering from data mining, and the corresponding
                 k-medoid algorithm. We make several innovative
                 contributions in designing our algorithm, including
                 various techniques such as linear programming, edge
                 sampling in graphs, volume estimation of
                 multi-dimensional convex polytopes, and several others.
                 We provide rigorous theoretical analysis, as well as
                 substantial experimental evaluations over real and
                 synthetic data sets to demonstrate the practical
                 feasibility of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kandula:2019:PDI,
  author =       "Srikanth Kandula and Laurel Orr and Surajit
                 Chaudhuri",
  title =        "Pushing data-induced predicates through joins in
                 big-data clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "252--265",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368292",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Using data statistics, we convert predicates on a
                 table into data induced predicates (diPs) that apply on
                 the joining tables. Doing so substantially speeds up
                 multi-relation queries because the benefits of
                 predicate pushdown can now apply beyond just the tables
                 that have predicates. We use diPs to skip data
                 exclusively during query optimization; i.e., diPs lead
                 to better plans and have no overhead during query
                 execution. We study how to apply diPs for complex query
                 expressions and how the usefulness of diPs varies with
                 the data statistics used to construct diPs and the data
                 distributions. Our results show that building diPs
                 using zone-maps which are already maintained in today's
                 clusters leads to sizable data skipping gains. Using a
                 new (slightly larger) statistic, 50\% of the queries in
                 the TPC-H, TPC-DS and JoinOrder benchmarks can skip at
                 least 33\% of the query input. Consequently, the median
                 query in a production big-data cluster finishes roughly
                 2x faster.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Pena:2019:DAE,
  author =       "Eduardo H. M. Pena and Eduardo C. de Almeida and Felix
                 Naumann",
  title =        "Discovery of approximate (and exact) denial
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "266--278",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368293",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Maintaining data consistency is known to be hard.
                 Recent approaches have relied on integrity constraints
                 to deal with the problem --- correct and complete
                 constraints naturally work towards data consistency.
                 State-of-the-art data cleaning frameworks have used the
                 formalism known as denial constraint (DC) to handle a
                 wide range of real-world constraints. Each DC expresses
                 a relationship between predicates that indicate which
                 combinations of attribute values are inconsistent. The
                 design of DCs, however, must keep pace with the
                 complexity of data and applications. The alternative to
                 designing DCs by hand is automatically discovering DCs
                 from data, which is computationally expensive due to
                 the large search space of DCs. To tackle this
                 challenging task, we present a novel algorithm to
                 efficiently discover DCs: DCFinder. The algorithm
                 combines data structures called position list indexes
                 with techniques based on predicate selectivity to
                 efficiently validate DC candidates. Because the
                 available data often contain errors, DCFinder is
                 especially designed to discovering approximate DCs,
                 i.e., DCs that may partially hold. Our experimental
                 evaluation uses real and synthetic datasets and shows
                 that DCFinder outperforms all the existing approximate
                 DC discovery algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Yang:2019:DUC,
  author =       "Zongheng Yang and Eric Liang and Amog Kamsetty and
                 Chenggang Wu and Yan Duan and Xi Chen and Pieter Abbeel
                 and Joseph M. Hellerstein and Sanjay Krishnan and Ion
                 Stoica",
  title =        "Deep unsupervised cardinality estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "279--292",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368294",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Cardinality estimation has long been grounded in
                 statistical tools for density estimation. To capture
                 the rich multivariate distributions of relational
                 tables, we propose the use of a new type of
                 high-capacity statistical model: deep autoregressive
                 models. However, direct application of these models
                 leads to a limited estimator that is prohibitively
                 expensive to evaluate for range or wildcard predicates.
                 To produce a truly usable estimator, we develop a Monte
                 Carlo integration scheme on top of autoregressive
                 models that can efficiently handle range queries with
                 dozens of dimensions or more. Like classical synopses,
                 our estimator summarizes the data without supervision.
                 Unlike previous solutions, we approximate the joint
                 data distribution without any independence assumptions.
                 Evaluated on real-world datasets and compared against
                 real systems and dominant families of techniques, our
                 estimator achieves single-digit multiplicative error at
                 tail, an up to 90x accuracy improvement over the second
                 best method, and is space- and runtime-efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Ding:2019:FGI,
  author =       "Zeyu Ding and Yuxin Wang and Danfeng Zhang and Daniel
                 Kifer",
  title =        "Free gap information from the differentially private
                 sparse vector and noisy max mechanisms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "293--306",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368295",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Noisy Max and Sparse Vector are selection algorithms
                 for differential privacy and serve as building blocks
                 for more complex algorithms. In this paper we show that
                 both algorithms can release additional information for
                 free (i.e., at no additional privacy cost). Noisy Max
                 is used to return the approximate maximizer among a set
                 of queries. We show that it can also release for free
                 the noisy gap between the approximate maximizer and
                 runner-up. This free information can improve the
                 accuracy of certain subsequent counting queries by up
                 to 50\%. Sparse Vector is used to return a set of
                 queries that are approximately larger than a fixed
                 threshold. We show that it can adaptively control its
                 privacy budget (use less budget for queries that are
                 likely to be much larger than the threshold) in order
                 to increase the amount of queries it can process. These
                 results follow from a careful privacy analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Sun:2019:EEL,
  author =       "Ji Sun and Guoliang Li",
  title =        "An end-to-end learning-based cost estimator",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "307--319",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368296",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Cost and cardinality estimation is vital to query
                 optimizer, which can guide the query plan selection.
                 However traditional empirical cost and cardinality
                 estimation techniques cannot provide high-quality
                 estimation, because they may not effectively capture
                 the correlation between multiple tables. Recently the
                 database community shows that the learning-based
                 cardinality estimation is better than the empirical
                 methods. However, existing learning-based methods have
                 several limitations. Firstly, they focus on estimating
                 the cardinality, but cannot estimate the cost.
                 Secondly, they are either too heavy or hard to
                 represent complicated structures, e.g., complex
                 predicates. To address these challenges, we propose an
                 effective end-to-end learning-based cost estimation
                 framework based on a tree-structured model, which can
                 estimate both cost and cardinality simultaneously. We
                 propose effective feature extraction and encoding
                 techniques, which consider both queries and physical
                 operations in feature extraction. We embed these
                 features into our tree-structured model. We propose an
                 effective method to encode string values, which can
                 improve the generalization ability for predicate
                 matching. As it is prohibitively expensive to enumerate
                 all string values, we design a patten-based method,
                 which selects patterns to cover string values and
                 utilizes the patterns to embed string values. We
                 conducted experiments on real-world datasets and
                 experimental results showed that our method
                 outperformed baselines.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zeng:2019:LMD,
  author =       "Yuxiang Zeng and Yongxin Tong and Lei Chen",
  title =        "Last-mile delivery made practical: an efficient route
                 planning framework with theoretical guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "320--333",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368297",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Last-mile delivery (LMD) refers to the movement of
                 goods from transportation origins to the final
                 destinations. It has widespread applications such as
                 urban logistics, e-commerce, etc. One fundamental
                 problem in last-mile delivery is route planning, which
                 schedules multiple couriers' routes, i.e., sequences of
                 origins and destinations of the requests under certain
                 optimization objectives. Prior studies usually designed
                 heuristic solutions to two strongly NP-hard
                 optimization objectives: minimizing the makespan (
                 i.e., maximum travel time) of couriers and total
                 latency ( i.e., waiting time) of requesters. There is
                 no algorithm with theoretical guarantees for either
                 optimization objective in practical cases. In this
                 paper, we propose a theoretically guaranteed solution
                 framework for both objectives. It achieves both
                 approximation ratios of $ 6 \rho $, where $ \rho $ is
                 the approximation ratio of a core operation, called $k$
                 LMD, which plans for one courier a route consisting of
                 $k$ requests. Leveraging a spatial index called
                 hierarchically separated tree, we further design an
                 efficient approximation algorithm for $k$ LMD with $
                 \rho = O(\log n)$, where $n$ is the number of requests.
                 Experimental results show that our approach outperforms
                 state-of-the-art methods by averagely 48.4\%--96.0\%
                 and 49.7\%--96.1\% for both objectives. Especially in
                 large-scale real datasets, our algorithm has $ 29.3
                 \times $--$ 108.9 \times $ shorter makespan and $ 20.2
                 \times $--$ 175.1 \times $ lower total latency than the
                 state-of-the-art algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kepe:2019:DPM,
  author =       "Tiago R. Kepe and Eduardo C. de Almeida and Marco A.
                 Z. Alves",
  title =        "Database processing-in-memory: an experimental study",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "334--347",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368298",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The rapid growth of ``big-data'' intensified the
                 problem of data movement when processing data
                 analytics: Large amounts of data need to move through
                 the memory up to the CPU before any computation takes
                 place. To tackle this costly problem,
                 Processing-in-Memory (PIM) inverts the traditional data
                 processing by pushing computation to memory with an
                 impact on performance and energy efficiency. In this
                 paper, we present an experimental study on processing
                 database SIMD operators in PIM compared to current x86
                 processor (i.e., using AVX512 instructions). We discuss
                 the execution time gap between those architectures.
                 However, this is the first experimental study, in the
                 database community, to discuss the trade-offs of
                 execution time and energy consumption between PIM and
                 x86 in the main query execution systems: materialized,
                 vectorized, and pipelined. We also discuss the results
                 of a hybrid query scheduling when interleaving the
                 execution of the SIMD operators between PIM and x86
                 processing hardware. In our results, the hybrid query
                 plan reduced the execution time by 45\%. It also
                 drastically reduced energy consumption by more than 2x
                 compared to hardware-specific query plans.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Leeka:2019:ISO,
  author =       "Jyoti Leeka and Kaushik Rajan",
  title =        "Incorporating super-operators in big-data query
                 optimizers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "348--361",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368299",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The cost of big-data analytics is dominated by shuffle
                 operations that induce multiple disk reads, writes and
                 network transfers. This paper proposes a new class of
                 optimization rules that are specifically aimed at
                 eliminating shuffles where possible. The rules
                 substitute multiple shuffle inducing operators ( Join,
                 UnionAll, Spool, GroupBy ) with a single streaming
                 operator which implements an entire sub-query. We call
                 such operators super-operators. A key challenge with
                 adding new rules that substitute sub-queries with
                 super-operators is that there are many variants of the
                 same sub-query that can be implemented via minor
                 modifications to the same super-operator. Adding each
                 as a separate rule leads to a search space explosion.
                 We propose several extensions to the query optimizer to
                 address this challenge. We propose a new abstract
                 representation for operator trees that captures all
                 possible sub-queries that a super-operator implements.
                 We propose a new rule matching algorithm that can
                 efficiently search for abstract operator trees. Finally
                 we extend the physical operator interface to introduce
                 new parametric super-operators. We implement our
                 changes in SCOPE, a state-of-the-art production
                 big-data optimizer used extensively at Microsoft. We
                 demonstrate that the proposed optimizations provide
                 significant reduction in both resource cost (average
                 1.7x) and latency (average 1.5x) on several production
                 queries, and do so without increasing optimization
                 time.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Li:2019:EPM,
  author =       "Conggai Li and Fan Zhang and Ying Zhang and Lu Qin and
                 Wenjie Zhang and Xuemin Lin",
  title =        "Efficient progressive minimum $k$-core search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "362--375",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368300",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "As one of the most representative cohesive subgraph
                 models, $k$-core model has recently received
                 significant attention in the literature. In this paper,
                 we investigate the problem of the minimum $k$-core
                 search: given a graph $G$, an integer $k$ and a set of
                 query vertices $ Q = \{ q \} $, we aim to find the
                 smallest $k$-core subgraph containing every query
                 vertex $ q \epsilon Q$. It has been shown that this
                 problem is NP-hard with a huge search space, and it is
                 very challenging to find the optimal solution. There
                 are several heuristic algorithms for this problem, but
                 they rely on simple scoring functions and there is no
                 guarantee as to the size of the resulting subgraph,
                 compared with the optimal solution. Our empirical study
                 also indicates that the size of their resulting
                 subgraphs may be large in practice. In this paper, we
                 develop an effective and efficient progressive
                 algorithm, namely PSA, to provide a good trade-off
                 between the quality of the result and the search time.
                 Novel lower and upper bound techniques for the minimum
                 $k$-core search are designed. Our extensive experiments
                 on 12 real-life graphs demonstrate the effectiveness
                 and efficiency of the new techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhu:2019:HNL,
  author =       "Hang Zhu and Zhihao Bai and Jialin Li and Ellis
                 Michael and Dan R. K. Ports and Ion Stoica and Xin
                 Jin",
  title =        "{Harmonia}: near-linear scalability for replicated
                 storage with in-network conflict detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "376--389",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368301",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Distributed storage employs replication to mask
                 failures and improve availability. However, these
                 systems typically exhibit a hard tradeoff between
                 consistency and performance. Ensuring consistency
                 introduces coordination overhead, and as a result the
                 system throughput does not scale with the number of
                 replicas. We present Harmonia, a replicated storage
                 architecture that exploits the capability of
                 new-generation programmable switches to obviate this
                 tradeoff by providing near-linear scalability without
                 sacrificing consistency. To achieve this goal, Harmonia
                 detects read-write conflicts in the network, which
                 enables any replica to serve reads for objects with no
                 pending writes. Harmonia implements this functionality
                 at line rate, thus imposing no performance overhead. We
                 have implemented a prototype of Harmonia on a cluster
                 of commodity servers connected by a Barefoot Tofino
                 switch, and have integrated it with Redis. We
                 demonstrate the generality of our approach by
                 supporting a variety of replication protocols,
                 including primary-backup, chain replication,
                 Viewstamped Replication, and NOPaxos. Experimental
                 results show that Harmonia improves the throughput of
                 these protocols by up to $ 10 \times $ for a
                 replication factor of $ 10 $, providing near-linear
                 scalability up to the limit of our testbed.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Walenz:2019:LSC,
  author =       "Brett Walenz and Stavros Sintos and Sudeepa Roy and
                 Jun Yang",
  title =        "Learning to sample: counting with complex queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "390--402",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368302",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We study the problem of efficiently estimating counts
                 for queries involving complex filters, such as
                 user-defined functions, or predicates involving
                 self-joins and correlated subqueries. For such queries,
                 traditional sampling techniques may not be applicable
                 due to the complexity of the filter preventing sampling
                 over joins, and sampling after the join may not be
                 feasible due to the cost of computing the full join.
                 The other natural approach of training and using an
                 inexpensive classifier to estimate the count instead of
                 the expensive predicate suffers from the difficulties
                 in training a good classifier and giving meaningful
                 confidence intervals. In this paper we propose a new
                 method of learning to sample where we combine the best
                 of both worlds by using sampling in two phases. First,
                 we use samples to learn a probabilistic classifier, and
                 then use the classifier to design a stratified sampling
                 method to obtain the final estimates. We theoretically
                 analyze algorithms for obtaining an optimal
                 stratification, and compare our approach with a suite
                 of natural alternatives like quantification learning,
                 weighted and stratified sampling, and other techniques
                 from the literature. We also provide extensive
                 experiments in diverse use cases using multiple real
                 and synthetic datasets to evaluate the quality,
                 efficiency, and robustness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Echihabi:2019:RLH,
  author =       "Karima Echihabi and Kostas Zoumpatianos and Themis
                 Palpanas and Houda Benbrahim",
  title =        "Return of the {Lernaean Hydra}: experimental
                 evaluation of data series approximate similarity
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "3",
  pages =        "403--420",
  month =        nov,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3368289.3368303",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Dec 11 07:51:13 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Data series are a special type of multidimensional
                 data present in numerous domains, where similarity
                 search is a key operation that has been extensively
                 studied in the data series literature. In parallel, the
                 multidimensional community has studied approximate
                 similarity search techniques. We propose a taxonomy of
                 similarity search techniques that reconciles the
                 terminology used in these two domains, we describe
                 modifications to data series indexing techniques
                 enabling them to answer approximate similarity queries
                 with quality guarantees, and we conduct a thorough
                 experimental evaluation to compare approximate
                 similarity search techniques under a unified framework,
                 on synthetic and real datasets in memory and on disk.
                 Although data series differ from generic
                 multidimensional vectors (series usually exhibit
                 correlation between neighboring values), our results
                 show that data series techniques answer approximate
                 queries with strong guarantees and an excellent
                 empirical performance, on data series and vectors
                 alike. These techniques outperform the state-of-the-art
                 approximate techniques for vectors when operating on
                 disk, and remain competitive in memory.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhou:2019:DDI,
  author =       "Xinjing Zhou and Lidan Shou and Ke Chen and Wei Hu and
                 Gang Chen",
  title =        "{DPTree}: differential indexing for persistent
                 memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "421--434",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372717",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The emergence of persistent memory (PM) spurs on
                 redesigns of database system components to gain full
                 exploitation of the persistence and speed of the
                 hardware. One crucial component studied by researchers
                 is persistent indices. However, such studies to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karimov:2019:AAH,
  author =       "Jeyhun Karimov and Tilmann Rabl and Volker Markl",
  title =        "{AJoin}: ad-hoc stream joins at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "435--448",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372718",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The processing model of state-of-the-art stream
                 processing engines is designed to execute long-running
                 queries one at a time. However, with the advance of
                 cloud technologies and multi-tenant systems, multiple
                 users share the same cloud for stream query \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Luo:2019:PSL,
  author =       "Chen Luo and Michael J. Carey",
  title =        "On performance stability in {LSM}-based storage
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "449--462",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372719",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "The Log-Structured Merge-Tree (LSM-tree) has been
                 widely adopted for use in modern NoSQL systems for its
                 superior write performance. Despite the popularity of
                 LSM-trees, they have been criticized for suffering from
                 write stalls and large performance \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Peng:2019:TBT,
  author =       "You Peng and Ying Zhang and Xuemin Lin and Wenjie
                 Zhang and Lu Qin and Jingren Zhou",
  title =        "Towards bridging theory and practice: hop-constrained
                 $s$--$t$ simple path enumeration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "463--476",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372720",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Graph is a ubiquitous structure representing entities
                 and their relationships applied in many areas such as
                 social networks, web graphs, and biological networks.
                 One of the fundamental tasks in graph analytics is to
                 investigate the relations between two \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Zhang:2019:PDS,
  author =       "Yuhao Zhang and Arun Kumar",
  title =        "{Panorama}: a data system for unbounded vocabulary
                 querying over video",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "477--491",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372721",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Deep convolutional neural networks (CNNs) achieve
                 state-of-the-art accuracy for many computer vision
                 tasks. But using them for video monitoring applications
                 incurs high computational cost and inference latency.
                 Thus, recent works have studied how to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lakhotia:2019:PTS,
  author =       "Kartik Lakhotia and Rajgopal Kannan and Qing Dong and
                 Viktor Prasanna",
  title =        "Planting trees for scalable and efficient canonical
                 hub labeling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "492--505",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372722",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Hub labeling is widely used to improve the latency and
                 throughput of Point-to-Point Shortest Distance (PPSD)
                 queries in graph databases. However, constructing hub
                 labeling, even via the state-of-the-art Pruned Landmark
                 Labeling (PLL) algorithm is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lahoti:2019:OIF,
  author =       "Preethi Lahoti and Krishna P. Gummadi and Gerhard
                 Weikum",
  title =        "Operationalizing individual fairness with pairwise
                 fair representations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "506--518",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372723",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We revisit the notion of individual fairness proposed
                 by Dwork et al. A central challenge in operationalizing
                 their approach is the difficulty in eliciting a human
                 specification of a similarity metric. In this paper, we
                 propose an operationalization of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kakaraparthy:2019:ODL,
  author =       "Aarati Kakaraparthy and Jignesh M. Patel and Kwanghyun
                 Park and Brian P. Kroth",
  title =        "Optimizing databases by learning hidden parameters of
                 solid state drives",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "519--532",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372724",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Solid State Drives (SSDs) are complex devices with
                 varying internal implementations, resulting in subtle
                 differences in behavior between devices. In this paper,
                 we demonstrate how a database engine can be optimized
                 for a particular device by learning \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Kang:2019:BOD,
  author =       "Daniel Kang and Peter Bailis and Matei Zaharia",
  title =        "{BlazeIt}: optimizing declarative aggregation and
                 limit queries for neural network-based video
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "533--546",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372725",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Recent advances in neural networks (NNs) have enabled
                 automatic querying of large volumes of video data with
                 high accuracy. While these deep NNs can produce
                 accurate annotations of an object's position and type
                 in video, they are computationally \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Huang:2019:JST,
  author =       "Dawei Huang and Dong Young Yoon and Seth Pettie and
                 Barzan Mozafari",
  title =        "Joins on samples: a theoretical guide for
                 practitioners",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "547--560",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372726",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Despite decades of research on AQP (approximate query
                 processing), our understanding of sample-based joins
                 has remained limited and, to some extent, even
                 superficial. The common belief in the community is that
                 joining random samples is futile. This \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Karagiannis:2019:MAK,
  author =       "Georgios Karagiannis and Immanuel Trummer and Saehan
                 Jo and Shubham Khandelwal and Xuezhi Wang and Cong Yu",
  title =        "Mining an ``anti-knowledge base'' from {Wikipedia}
                 updates with applications to fact checking and beyond",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "561--573",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372727",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "We introduce the problem of anti-knowledge mining. Our
                 goal is to create an {``anti-knowledge base''} that
                 contains factual mistakes. The resulting data can be
                 used for analysis, training, and benchmarking in the
                 research domain of automated fact checking. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Lersch:2019:EPM,
  author =       "Lucas Lersch and Xiangpeng Hao and Ismail Oukid and
                 Tianzheng Wang and Thomas Willhalm",
  title =        "Evaluating persistent memory range indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "4",
  pages =        "574--587",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3372716.3372728",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jan 8 18:50:37 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  abstract =     "Persistent memory (PM) is fundamentally changing the
                 way database index structures are built by enabling
                 persistence, high performance, and (near) instant
                 recovery all on the memory bus. Prior work has proposed
                 many techniques to tailor index structure \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "http://portal.acm.org/citation.cfm?id=J1174",
}

@Article{Goldstein:2020:MBR,
  author =       "Jonathan Goldstein and Ahmed Abdelhamid and Mike
                 Barnett and Sebastian Burckhardt and Badrish
                 Chandramouli and Darren Gehring and Niel Lebeck and
                 Christopher Meiklejohn and Umar Farooq Minhas and Ryan
                 Newton and Rahee Ghosh Peshawaria and Tal Zaccai and
                 Irene Zhang",
  title =        "{A.M.B.R.O.S.I.A}: providing performant virtual
                 resiliency for distributed applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "588--601",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377370",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377370",
  abstract =     "When writing today's distributed programs, which
                 frequently span both devices and cloud services,
                 programmers are faced with complex decisions and coding
                 tasks around coping with failure, especially when these
                 distributed components are stateful. If \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ouyang:2020:ESP,
  author =       "Dian Ouyang and Long Yuan and Lu Qin and Lijun Chang
                 and Ying Zhang and Xuemin Lin",
  title =        "Efficient shortest path index maintenance on dynamic
                 road networks with theoretical guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "602--615",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377371",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377371",
  abstract =     "Computing the shortest path between two vertices is a
                 fundamental problem in road networks that is applied in
                 a wide variety of applications. To support efficient
                 shortest path query processing, a plethora of
                 index-based methods have been proposed in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Stehle:2020:PMP,
  author =       "Elias Stehle and Hans-Arno Jacobsen",
  title =        "{ParPaRaw}: massively parallel parsing of
                 delimiter-separated raw data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "616--628",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377372",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377372",
  abstract =     "Parsing is essential for a wide range of use cases,
                 such as stream processing, bulk loading, and in-situ
                 querying of raw data. Yet, the compute-intense step
                 often constitutes a major bottleneck in the data
                 ingestion pipeline, since parsing of inputs \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2020:OOC,
  author =       "Yihe Huang and William Qian and Eddie Kohler and
                 Barbara Liskov and Liuba Shrira",
  title =        "Opportunities for optimism in contended main-memory
                 multicore transactions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "629--642",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377373",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377373",
  abstract =     "Optimistic concurrency control, or OCC, can achieve
                 excellent performance on uncontended workloads for
                 main-memory transactional databases. Contention causes
                 OCC's performance to degrade, however, and recent
                 concurrency control designs, such as hybrid \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zheng:2020:PLF,
  author =       "Bolong Zheng and Xi Zhao and Lianggui Weng and Nguyen
                 Quoc Viet Hung and Hang Liu and Christian S. Jensen",
  title =        "{PM-LSH}: a fast and accurate {LSH} framework for
                 high-dimensional approximate {NN} search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "643--655",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377374",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377374",
  abstract =     "Nearest neighbor (NN) search in high-dimensional
                 spaces is inherently computationally expensive due to
                 the curse of dimensionality. As a well-known solution
                 to approximate NN search, locality-sensitive hashing
                 (LSH) is able to answer c-approximate NN (c-\ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2020:HMB,
  author =       "Yahui Sun and Jun Luo and Theodoros Lappas and Xiaokui
                 Xiao and Bin Cui",
  title =        "Hunting multiple bumps in graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "656--669",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377375",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377375",
  abstract =     "Bump hunting is an important approach to the
                 extraction of insights from Euclidean datasets.
                 Recently, it has been explored for graph datasets for
                 the first time, and a single bump is hunted in an
                 unweighted graph in this exploration. Here, we extend
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2020:HNE,
  author =       "Renchi Yang and Jieming Shi and Xiaokui Xiao and Yin
                 Yang and Sourav S. Bhowmick",
  title =        "Homogeneous network embedding for massive graphs via
                 reweighted personalized {PageRank}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "670--683",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377376",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377376",
  abstract =     "Given an input graph G and a node $ v \in G $,
                 homogeneous network embedding (HNE) maps the graph
                 structure in the vicinity of $v$ to a compact,
                 fixed-dimensional feature vector. This paper focuses on
                 HNE for massive graphs, e.g., with billions of edges.
                 On \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Qahtan:2020:PFD,
  author =       "Abdulhakim Qahtan and Nan Tang and Mourad Ouzzani and
                 Yang Cao and Michael Stonebraker",
  title =        "Pattern functional dependencies for data cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "684--697",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377377",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377377",
  abstract =     "Patterns (or regex-based expressions) are widely used
                 to constrain the format of a domain (or a column),
                 e.g., a Year column should contain only four digits,
                 and thus a value like ``1980-'' might be a typo.
                 Moreover, integrity constraints (ICs) defined
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Whang:2020:MMV,
  author =       "Joyce Jiyoung Whang and Rundong Du and Sangwon Jung
                 and Geon Lee and Barry Drake and Qingqing Liu and
                 Seonggoo Kang and Haesun Park",
  title =        "{MEGA}: multi-view semi-supervised clustering of
                 hypergraphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "698--711",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377378",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377378",
  abstract =     "Complex relationships among entities can be modeled
                 very effectively using hypergraphs. Hypergraphs model
                 real-world data by allowing a hyperedge to include two
                 or more entities. Clustering of hypergraphs enables us
                 to group the similar entities \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Koumarelas:2020:MDD,
  author =       "Ioannis Koumarelas and Thorsten Papenbrock and Felix
                 Naumann",
  title =        "{MDedup}: duplicate detection with matching
                 dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "712--725",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377379",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377379",
  abstract =     "Duplicate detection is an integral part of data
                 cleaning and serves to identify multiple
                 representations of same real-world entities in
                 (relational) datasets. Existing duplicate detection
                 approaches are effective, but they are also hard to
                 parameterize \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tran:2020:PVU,
  author =       "Van-Dang Tran and Hiroyuki Kato and Zhenjiang Hu",
  title =        "Programmable view update strategies on relations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "726--739",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377380",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377380",
  abstract =     "View update is an important mechanism that allows
                 updates on a view by translating them into the
                 corresponding updates on the base relations. The
                 existing literature has shown the ambiguity of
                 translating view updates. To address this ambiguity, we
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kumar:2020:ADD,
  author =       "Avinash Kumar and Zuozhi Wang and Shengquan Ni and
                 Chen Li",
  title =        "{Amber}: a debuggable dataflow system based on the
                 actor model",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "740--753",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377381",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377381",
  abstract =     "A long-running analytic task on big data often leaves
                 a developer in the dark without providing valuable
                 feedback about the status of the execution. In
                 addition, a failed job that needs to restart from
                 scratch can waste earlier computing resources. An
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schiavio:2020:DSO,
  author =       "Filippo Schiavio and Daniele Bonetta and Walter
                 Binder",
  title =        "Dynamic speculative optimizations for {SQL}
                 compilation in {Apache Spark}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "754--767",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377382",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377382",
  abstract =     "Big-data systems have gained significant momentum, and
                 Apache Spark is becoming a de-facto standard for modern
                 data analytics. Spark relies on SQL query compilation
                 to optimize the execution performance of analytical
                 workloads on a variety of data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Khayati:2020:MGE,
  author =       "Mourad Khayati and Alberto Lerner and Zakhar Tymchenko
                 and Philippe Cudr{\'e}-Mauroux",
  title =        "Mind the gap: an experimental evaluation of imputation
                 of missing values techniques in time series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "5",
  pages =        "768--782",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3377369.3377383",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:27 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3377369.3377383",
  abstract =     "Recording sensor data is seldom a perfect process.
                 Failures in power, communication or storage can leave
                 occasional blocks of data missing, affecting not only
                 real-time monitoring but also compromising the quality
                 of near- and off-line data analysis. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mofrad:2020:GNA,
  author =       "Mohammad Hasanzadeh Mofrad and Rami Melhem and Yousuf
                 Ahmad and Mohammad Hammoud",
  title =        "{Graphite}: a {NUMA}-aware {HPC} system for graph
                 analytics based on a new {MPI * X} parallelism model",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "783--797",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380751",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380751",
  abstract =     "In this paper, we propose a new parallelism model
                 denoted as MPI * X and suggest a linear algebra-based
                 graph analytics system, namely, Graphite, which
                 effectively employs it. MPI * X promotes thread-based
                 partitioning to distribute computation and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Boer:2020:PIA,
  author =       "Naama Boer and Daniel Deutch and Nave Frost and Tova
                 Milo",
  title =        "Personal insights for altering decisions of tree-based
                 ensembles over time",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "798--811",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380752",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380752",
  abstract =     "Machine Learning models are prevalent in critical
                 human-related decision making, such as resume filtering
                 and loan applications. Refused individuals naturally
                 ask what could change the decision, should they
                 reapply. This question is hard for the model \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Peng:2020:ABS,
  author =       "You Peng and Ying Zhang and Xuemin Lin and Lu Qin and
                 Wenjie Zhang",
  title =        "Answering billion-scale label-constrained reachability
                 queries within microsecond",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "812--825",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380753",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380753",
  abstract =     "In this paper, we study the problem of
                 label-constrained reachability (LCR) query which is
                 fundamental in many applications with directed
                 edge-label graphs. Although the classical reachability
                 query (i.e., reachability query without label
                 constraint) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2020:EER,
  author =       "Ruihong Huang and Shaoxu Song and Yunsu Lee and Jungho
                 Park and Soo-Hyung Kim and Sungmin Yi",
  title =        "Effective and efficient retrieval of structured
                 entities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "826--839",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380754",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380754",
  abstract =     "Structured entities are commonly abstracted, such as
                 from XML, RDF or hidden-web databases. Direct retrieval
                 of various structured entities is highly demanded in
                 data lakes, e.g., given a JSON object, to find the XML
                 entities that denote the same real-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sirin:2020:MAA,
  author =       "Utku Sirin and Anastasia Ailamaki",
  title =        "Micro-architectural analysis of {OLAP}: limitations
                 and opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "840--853",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380755",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380755",
  abstract =     "Understanding micro-architectural behavior is
                 important for efficiently using hardware resources.
                 Recent work has shown that in-memory online transaction
                 processing (OLTP) systems severely underutilize their
                 core micro-architecture resources [29]. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fang:2020:EEC,
  author =       "Yixiang Fang and Yixing Yang and Wenjie Zhang and
                 Xuemin Lin and Xin Cao",
  title =        "Effective and efficient community search over large
                 heterogeneous information networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "854--867",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380756",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380756",
  abstract =     "Recently, the topic of community search (CS) has
                 gained plenty of attention. Given a query vertex, CS
                 looks for a dense subgraph that contains it. Existing
                 studies mainly focus on homogeneous graphs in which
                 vertices are of the same type, and cannot be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gupta:2020:RGS,
  author =       "Suyash Gupta and Sajjad Rahnama and Jelle Hellings and
                 Mohammad Sadoghi",
  title =        "{ResilientDB}: global scale resilient blockchain
                 fabric",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "868--883",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380757",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380757",
  abstract =     "Recent developments in blockchain technology have
                 inspired innovative new designs in resilient
                 distributed and database systems. At their core, these
                 blockchain applications typically use Byzantine
                 fault-tolerant consensus protocols to maintain a common
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Funke:2020:DPQ,
  author =       "Henning Funke and Jens Teubner",
  title =        "Data-parallel query processing on non-uniform data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "884--897",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380758",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380758",
  abstract =     "Graphics processing units (GPUs) promise spectacular
                 performance advantages when used as database
                 coprocessors. Their massive compute capacity, however,
                 is often hampered by control flow divergence caused by
                 non-uniform data distributions. When data-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Feng:2020:EMH,
  author =       "Zonghao Feng and Qiong Luo",
  title =        "Evaluating memory-hard proof-of-work algorithms on
                 three processors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "898--911",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380759",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380759",
  abstract =     "Most public blockchain systems, exemplified by
                 cryptocurrencies such as Ethereum and Monero, use
                 memory-hard proof-of-work (PoW) algorithms in consensus
                 protocols to maintain fair participation without a
                 trusted third party. The memory hardness, or the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2020:ASW,
  author =       "Seokki Lee and Bertram Lud{\"a}scher and Boris
                 Glavic",
  title =        "Approximate summaries for why and why-not provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "912--924",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380760",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380760",
  abstract =     "Why and why-not provenance have been studied
                 extensively in recent years. However, why-not
                 provenance and --- to a lesser degree --- why
                 provenance can be very large, resulting in severe
                 scalability and usability challenges. We introduce a
                 novel \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jiang:2020:PAD,
  author =       "Hao Jiang and Chunwei Liu and Qi Jin and John
                 Paparrizos and Aaron J. Elmore",
  title =        "{PIDS}: attribute decomposition for improved
                 compression and query performance in columnar storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "925--938",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380761",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380761",
  abstract =     "We propose PIDS, Pattern Inference Decomposed Storage,
                 an innovative storage method for decomposing string
                 attributes in columnar stores. Using an unsupervised
                 approach, PIDS identifies common patterns in string
                 attributes from relational databases, and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Asudeh:2020:DCP,
  author =       "Abolfazl Asudeh and H. V. Jagadish and You (Will) Wu
                 and Cong Yu",
  title =        "On detecting cherry-picked trendlines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "6",
  pages =        "939--952",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3380750.3380762",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Apr 2 10:51:28 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3380750.3380762",
  abstract =     "Poorly supported stories can be told based on data by
                 cherry-picking the data points included. While such
                 stories may be technically accurate, they are
                 misleading. In this paper, we build a system for
                 detecting cherry-picking, with a focus on trendlines
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ota:2020:DDD,
  author =       "Masayo Ota and Heiko M{\"u}ller and Juliana Freire and
                 Divesh Srivastava",
  title =        "Data-driven domain discovery for structured datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "953--967",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384346",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384346",
  abstract =     "The growing number of open datasets has created new
                 opportunities to derive insights and address important
                 societal problems. These data, however, often come with
                 little or no metadata, in particular about the types of
                 their attributes, thus greatly \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shi:2020:RIF,
  author =       "Jieming Shi and Tianyuan Jin and Renchi Yang and
                 Xiaokui Xiao and Yin Yang",
  title =        "Realtime index-free single source {SimRank} processing
                 on web-scale graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "966--980",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384347",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384347",
  abstract =     "Given a graph $G$ and a node $ u \in G$, a single
                 source SimRank query evaluates the similarity between
                 $u$ and every node $ v \in G$. Existing approaches to
                 single source SimRank computation incur either long
                 query response time, or expensive pre-computation,
                 which \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:DAR,
  author =       "Jiachuan Wang and Peng Cheng and Libin Zheng and Chao
                 Feng and Lei Chen and Xuemin Lin and Zheng Wang",
  title =        "Demand-aware route planning for shared mobility
                 services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "979--991",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384348",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384348",
  abstract =     "The dramatic development of shared mobility in food
                 delivery, ridesharing, and crowdsourced parcel delivery
                 has drawn great concerns. Specifically, shared mobility
                 refers to transferring or delivering more than one
                 passenger/package together when their \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hilprecht:2020:DLD,
  author =       "Benjamin Hilprecht and Andreas Schmidt and Moritz
                 Kulessa and Alejandro Molina and Kristian Kersting and
                 Carsten Binnig",
  title =        "{DeepDB}: learn from data, not from queries!",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "992--1005",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384349",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384349",
  abstract =     "The typical approach for learned DBMS components is to
                 capture the behavior by running a representative set of
                 queries and use the observations to train a machine
                 learning model. This workload-driven approach, however,
                 has two major downsides. First, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:DMU,
  author =       "Yuepeng Wang and Rushi Shah and Abby Criswell and Rong
                 Pan and Isil Dillig",
  title =        "Data migration using datalog program synthesis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1006--1019",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384350",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384350",
  abstract =     "This paper presents a new technique for migrating data
                 between different schemas. Our method expresses the
                 schema mapping as a Datalog program and automatically
                 synthesizes a Datalog program from simple input-output
                 examples to perform data migration. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2020:LTG,
  author =       "Xiaowei Zhu and Guanyu Feng and Marco Serafini and
                 Xiaosong Ma and Jiping Yu and Lei Xie and Ashraf
                 Aboulnaga and Wenguang Chen",
  title =        "{LiveGraph}: a transactional graph storage system with
                 purely sequential adjacency list scans",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1020--1034",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384351",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384351",
  abstract =     "The specific characteristics of graph workloads make
                 it hard to design a one-size-fits-all graph storage
                 system. Systems that support transactional updates use
                 data structures with poor data locality, which limits
                 the efficiency of analytical workloads \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2020:KKB,
  author =       "Xueling Lin and Haoyang Li and Hao Xin and Zijian Li
                 and Lei Chen",
  title =        "{KBPearl}: a knowledge base population system
                 supported by joint entity and relation linking",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1035--1049",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384352",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384352",
  abstract =     "Nowadays, most openly available knowledge bases (KBs)
                 are incomplete, since they are not synchronized with
                 the emerging facts happening in the real world.
                 Therefore, knowledge base population (KBP) from
                 external data sources, which extracts knowledge
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2020:CUT,
  author =       "Tianyi Li and Ruikai Huang and Lu Chen and Christian
                 S. Jensen and Torben Bach Pedersen",
  title =        "Compression of uncertain trajectories in road
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1050--1063",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384353",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384353",
  abstract =     "Massive volumes of uncertain trajectory data are being
                 generated by GPS devices. Due to the limitations of GPS
                 data, these trajectories are generally uncertain. This
                 state of affairs renders it is attractive to be able to
                 compress uncertain trajectories \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shastri:2020:UBI,
  author =       "Supreeth Shastri and Vinay Banakar and Melissa
                 Wasserman and Arun Kumar and Vijay Chidambaram",
  title =        "Understanding and benchmarking the impact of {GDPR} on
                 database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1064--1077",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384354",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384354",
  abstract =     "The General Data Protection Regulation (GDPR) provides
                 new rights and protections to European people
                 concerning their personal data. We analyze GDPR from a
                 systems perspective, translating its legal articles
                 into a set of capabilities and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2020:LOP,
  author =       "Jihang Liu and Shimin Chen and Lujun Wang",
  title =        "{LB+Trees}: optimizing persistent index performance on
                 {$3$DXPoint} memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1078--1090",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384355",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384355",
  abstract =     "3DXPoint memory is the first commercially available
                 NVM solution targeting mainstream computer systems.
                 While 3DXPoint conforms to many assumptions about NVM
                 in previous studies, we observe a number of distinctive
                 features of 3DXPoint. For example, the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lersch:2020:ELT,
  author =       "Lucas Lersch and Ivan Schreter and Ismail Oukid and
                 Wolfgang Lehner",
  title =        "Enabling low tail latency on multicore key-value
                 stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1091--1104",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384356",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384356",
  abstract =     "Modern applications employ key-value stores (KVS) in
                 at least some point of their software stack, often as a
                 caching system or a storage manager. Many of these
                 applications also require a high degree of
                 responsiveness and performance predictability.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2020:PAA,
  author =       "Chunbin Lin and Etienne Boursier and Yannis
                 Papakonstantinou",
  title =        "{Plato}: approximate analytics over compressed time
                 series with tight deterministic error guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1105--1118",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384357",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384357",
  abstract =     "Plato provides fast approximate analytics on time
                 series, by precomputing and storing compressed time
                 series. Plato's key novelty is the delivery of tight
                 deterministic error guarantees for the linear algebra
                 operators over vectors\slash time series, the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gera:2020:TLG,
  author =       "Prasun Gera and Hyojong Kim and Piyush Sao and Hyesoon
                 Kim and David Bader",
  title =        "Traversing large graphs on {GPUs} with unified
                 memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1119--1133",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384358",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384358",
  abstract =     "Due to the limited capacity of GPU memory, the
                 majority of prior work on graph applications on GPUs
                 has been restricted to graphs of modest sizes that fit
                 in memory. Recent hardware and software advances make
                 it possible to address much larger host \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ping:2020:SHQ,
  author =       "Haoyue Ping and Julia Stoyanovich and Benny
                 Kimelfeld",
  title =        "Supporting hard queries over probabilistic
                 preferences",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "7",
  pages =        "1134--1146",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3384345.3384359",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:13 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3384345.3384359",
  abstract =     "Preference analysis is widely applied in various
                 domains such as social choice and e-commerce. A
                 recently proposed framework augments the relational
                 database with a preference relation that represents
                 uncertain preferences in the form of statistical
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2020:DSH,
  author =       "Baotong Lu and Xiangpeng Hao and Tianzheng Wang and
                 Eric Lo",
  title =        "{Dash}: scalable hashing on persistent memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1147--1161",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389134",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389134",
  abstract =     "Byte-addressable persistent memory (PM) brings hash
                 tables the potential of low latency, cheap persistence
                 and instant recovery. The recent advent of Intel Optane
                 DC Persistent Memory Modules (DCPMM) further
                 accelerates this trend. Many new hash table designs
                 have been proposed, but most of them were based on
                 emulation and perform sub-optimally on real PM. They
                 were also piece-wise and partial solutions that
                 side-step many important properties, in particular good
                 scalability, high load factor and instant
                 recovery.\par

                 We present Dash, a holistic approach to building
                 dynamic and scalable hash tables on real PM hardware
                 with all the aforementioned properties. Based on Dash,
                 we adapted two popular dynamic hashing schemes
                 (extendible hashing and linear hashing). On a 24-core
                 machine with Intel Optane DCPMM, we show that compared
                 to state-of-the-art, Dash-enabled hash tables can
                 achieve up to $ \approx 3.9 \times $ higher performance
                 with up to over 90\ load factor and an instant recovery
                 time of 57ms regardless of data size.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ferragina:2020:PIF,
  author =       "Paolo Ferragina and Giorgio Vinciguerra",
  title =        "The {PGM-index}: a fully-dynamic compressed learned
                 index with provable worst-case bounds",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1162--1175",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389135",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389135",
  abstract =     "We present the first learned index that supports
                 predecessor, range queries and updates within provably
                 efficient time and space bounds in the worst case. In
                 the (static) context of just predecessor and range
                 queries these bounds turn out to be optimal. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ma:2020:DRC,
  author =       "Minghua Ma and Zheng Yin and Shenglin Zhang and Sheng
                 Wang and Christopher Zheng and Xinhao Jiang and Hanwen
                 Hu and Cheng Luo and Yilin Li and Nengjun Qiu and
                 Feifei Li and Changcheng Chen and Dan Pei",
  title =        "Diagnosing root causes of intermittent slow queries in
                 cloud databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1176--1189",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389136",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389136",
  abstract =     "With the growing market of cloud databases, careful
                 detection and elimination of slow queries are of great
                 importance to service stability. Previous studies focus
                 on optimizing the slow queries that result from
                 internal reasons (e.g., poorly-written \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2020:PEF,
  author =       "Xuhao Chen and Roshan Dathathri and Gurbinder Gill and
                 Keshav Pingali",
  title =        "{Pangolin}: an efficient and flexible graph mining
                 system on {CPU} and {GPU}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1190--1205",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389137",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389137",
  abstract =     "There is growing interest in graph pattern mining
                 (GPM) problems such as motif counting. GPM systems have
                 been developed to provide unified interfaces for
                 programming algorithms for these problems and for
                 running them on parallel systems. However, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dreseler:2020:QTH,
  author =       "Markus Dreseler and Martin Boissier and Tilmann Rabl
                 and Matthias Uflacker",
  title =        "Quantifying {TPC-H} choke points and their
                 optimizations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1206--1220",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389138",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389138",
  abstract =     "TPC-H continues to be the most widely used benchmark
                 for relational OLAP systems. It poses a number of
                 challenges, also known as ``choke points'', which
                 database systems have to solve in order to achieve good
                 benchmark results. Examples include joins \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2020:EAC,
  author =       "Yuanbing Li and Xian Wu and Yifei Jin and Jian Li and
                 Guoliang Li",
  title =        "Efficient algorithms for crowd-aided categorization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1221--1233",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389139",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389139",
  abstract =     "We study the problem of utilizing human intelligence
                 to categorize a large number of objects. In this
                 problem, given a category hierarchy and a set of
                 objects, we can ask humans to check whether an object
                 belongs to a category, and our goal is to find
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:SVD,
  author =       "Shaowei Wang and Yuqiu Qian and Jiachun Du and Wei
                 Yang and Liusheng Huang and Hongli Xu",
  title =        "Set-valued data publication with local privacy: tight
                 error bounds and efficient mechanisms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1234--1247",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389140",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389140",
  abstract =     "Most user-generated data in online services are
                 presented as set-valued data, e.g., visited website
                 URLs, recently used Apps by a person, and etc. These
                 data are of great value to service providers, but also
                 bring privacy concerns if collected and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fegaras:2020:TAB,
  author =       "Leonidas Fegaras and Hasanuzzaman Noor",
  title =        "Translation of array-based loops to distributed
                 data-parallel programs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1248--1260",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389141",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389141",
  abstract =     "Large volumes of data generated by scientific
                 experiments and simulations come in the form of arrays,
                 while programs that analyze these data are frequently
                 expressed in terms of array operations in an
                 imperative, loop-based language. But, as datasets
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2020:IGP,
  author =       "Wenfei Fan and Muyang Liu and Chao Tian and Ruiqi Xu
                 and Jingren Zhou",
  title =        "Incrementalization of graph partitioning algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1261--1274",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389142",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389142",
  abstract =     "This paper studies incremental graph partitioning.
                 Given a (vertex-cut or edge-cut) partition $ C(G) $ of
                 a graph $G$ and updates $ \Delta G$ to $G$, it is to
                 compute changes $ \Delta O$ to $ C(G)$, yielding a
                 partition of the updated graph such that (a) the new
                 partition is load-\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ko:2020:OIS,
  author =       "Shao-Heng Ko and Hsu-Chao Lai and Hong-Han Shuai and
                 Wang-Chien Lee and Philip S. Yu and De-Nian Yang",
  title =        "Optimizing item and subgroup configurations for
                 social-aware {VR} shopping",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1275--1289",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389143",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389143",
  abstract =     "Shopping in VR malls has been regarded as a paradigm
                 shift for E-commerce, but most of the conventional VR
                 shopping platforms are designed for a single user. In
                 this paper, we envisage a scenario of VR group
                 shopping, which brings major advantages over \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Savvides:2020:ECP,
  author =       "Savvas Savvides and Darshika Khandelwal and Patrick
                 Eugster",
  title =        "Efficient confidentiality-preserving data analytics
                 over symmetrically encrypted datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1290--1303",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389144",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389144",
  abstract =     "In the past decade, cloud computing has emerged as an
                 economical and practical alternative to in-house
                 datacenters. But due to security concerns, many
                 enterprises are still averse to adopting third party
                 clouds. To mitigate these concerns, several \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gill:2020:SMG,
  author =       "Gurbinder Gill and Roshan Dathathri and Loc Hoang and
                 Ramesh Peri and Keshav Pingali",
  title =        "Single machine graph analytics on massive datasets
                 using {Intel Optane DC Persistent Memory}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "8",
  pages =        "1304--1318",
  month =        apr,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3389133.3389145",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 5 14:01:14 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3389133.3389145",
  abstract =     "Intel Optane DC Persistent Memory (Optane PMM) is a
                 new kind of byte-addressable memory with higher density
                 and lower cost than DRAM. This enables the design of
                 affordable systems that support up to 6TB of randomly
                 accessible memory. In this paper, we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zakhary:2020:ACA,
  author =       "Victor Zakhary and Divyakant Agrawal and Amr {El
                 Abbadi}",
  title =        "Atomic commitment across blockchains",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1319--1331",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397231",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397231",
  abstract =     "The recent adoption of blockchain technologies and
                 open permissionless networks suggest the importance of
                 peer-to-peer atomic cross-chain transaction protocols.
                 Users should be able to atomically exchange tokens and
                 assets without depending on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mathew:2020:HSM,
  author =       "Ajit Mathew and Changwoo Min",
  title =        "{HydraList}: a scalable in-memory index using
                 asynchronous updates and partial replication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1332--1345",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397232",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397232",
  abstract =     "Increased capacity of main memory has led to the rise
                 of in-memory databases. With disk access eliminated,
                 efficiency of index structures has become critical for
                 performance in these systems. An ideal index structure
                 should exhibit high performance for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Davis:2020:EMP,
  author =       "A. Jesse Jiryu Davis and Max Hirschhorn and Judah
                 Schvimer",
  title =        "Extreme modelling in practice",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1346--1358",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397233",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397233",
  abstract =     "Formal modelling is a powerful tool for developing
                 complex systems. At MongoDB, we use TLA$^+$ to model
                 and verify multiple aspects of several systems.
                 Ensuring conformance between a specification and its
                 implementation can add value to any specification;
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lyu:2020:MBS,
  author =       "Bingqing Lyu and Lu Qin and Xuemin Lin and Ying Zhang
                 and Zhengping Qian and Jingren Zhou",
  title =        "Maximum biclique search at billion scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1359--1372",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397234",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397234",
  abstract =     "Maximum biclique search, which finds the biclique with
                 the maximum number of edges in a bipartite graph, is a
                 fundamental problem with a wide spectrum of
                 applications in different domains, such as E-Commerce,
                 social analysis, web services, and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chepurko:2020:AAR,
  author =       "Nadiia Chepurko and Ryan Marcus and Emanuel Zgraggen
                 and Raul Castro Fernandez and Tim Kraska and David
                 Karger",
  title =        "{ARDA}: automatic relational data augmentation for
                 machine learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1373--1387",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397235",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397235",
  abstract =     "Automatic machine learning (AML) is a family of
                 techniques to automate the process of training
                 predictive models, aiming to both improve performance
                 and make machine learning more accessible. While many
                 recent works have focused on aspects of the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Alkowaileet:2020:LBT,
  author =       "Wail Y. Alkowaileet and Sattam Alsubaiee and Michael
                 J. Carey",
  title =        "An {LSM}-based tuple compaction framework for {Apache
                 AsterixDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1388--1400",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397236",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397236",
  abstract =     "Document database systems store self-describing
                 semi-structured records, such as JSON, ``as-is''
                 without requiring the users to pre-define a schema.
                 This provides users with the flexibility to change the
                 structure of incoming records without worrying
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shraga:2020:ACD,
  author =       "Roee Shraga and Avigdor Gal and Haggai Roitman",
  title =        "{ADnEV}: cross-domain schema matching using deep
                 similarity matrix adjustment and evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1401--1415",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397237",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397237",
  abstract =     "Schema matching is a process that serves in
                 integrating structured and semi-structured data. Being
                 a handy tool in multiple contemporary business and
                 commerce applications, it has been investigated in the
                 fields of databases, AI, Semantic Web, and data
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2020:QPP,
  author =       "Xuanhe Zhou and Ji Sun and Guoliang Li and Jianhua
                 Feng",
  title =        "Query performance prediction for concurrent queries
                 using graph embedding",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1416--1428",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397238",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397238",
  abstract =     "Query performance prediction is vital to many database
                 tasks (e.g., database monitoring and query scheduling).
                 Existing methods focus on predicting the performance
                 for a single query but cannot effectively predict the
                 performance for concurrent queries, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Alquraan:2020:SNZ,
  author =       "Ahmed Alquraan and Alex Kogan and Virendra J. Marathe
                 and Samer Al-Kiswany",
  title =        "Scalable, near-zero loss disaster recovery for
                 distributed data stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1429--1442",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397239",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397239",
  abstract =     "This paper presents a new Disaster Recovery (DR)
                 system, called Slogger, that differs from prior works
                 in two principle ways: (i) Slogger enables DR for a
                 linearizable distributed data store, and (ii) Slogger
                 adopts the continuous backup approach that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2020:VAN,
  author =       "Kejing Lu and Hongya Wang and Wei Wang and Mineichi
                 Kudo",
  title =        "{VHP}: approximate nearest neighbor search via virtual
                 hypersphere partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1443--1455",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397240",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397240",
  abstract =     "Locality sensitive hashing (LSH) is a widely practiced
                 c -approximate nearest neighbor( c -ANN) search
                 algorithm in high dimensional spaces. The
                 state-of-the-art LSH based algorithm searches an
                 unbounded and irregular space to identify candidates,
                 which \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kim:2020:IFS,
  author =       "Hyunjoon Kim and Seunghwan Min and Kunsoo Park and
                 Xuemin Lin and Seok-Hee Hong and Wook-Shin Han",
  title =        "{IDAR}: fast supergraph search using {DAG}
                 integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1456--1468",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397241",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397241",
  abstract =     "Supergraph search is one of fundamental graph query
                 processing problems in many application domains. Given
                 a query graph and a set of data graphs, supergraph
                 search is to find all the data graphs contained in the
                 query graph as subgraphs. In existing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Seleznova:2020:GEU,
  author =       "Mariia Seleznova and Behrooz Omidvar-Tehrani and Sihem
                 Amer-Yahia and Eric Simon",
  title =        "Guided exploration of user groups",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1469--1482",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397242",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397242",
  abstract =     "Finding a set of users of interest serves several
                 applications in behavioral analytics. Often times,
                 identifying users requires to explore the data and
                 gradually choose potential targets. This is a special
                 case of Exploratory Data Analysis (EDA), an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gong:2020:IID,
  author =       "Long Gong and Huayi Wang and Mitsunori Ogihara and Jun
                 Xu",
  title =        "{iDEC}: indexable distance estimating codes for
                 approximate nearest neighbor search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1483--1497",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397243",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397243",
  abstract =     "Approximate Nearest Neighbor (ANN) search is a
                 fundamental algorithmic problem, with numerous
                 applications in many areas of computer science. In this
                 work, we propose indexable distance estimating codes
                 (iDEC), a new solution framework to ANN that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bian:2020:EAB,
  author =       "Song Bian and Qintian Guo and Sibo Wang and Jeffrey Xu
                 Yu",
  title =        "Efficient algorithms for budgeted influence
                 maximization on massive social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1498--1510",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397244",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397244",
  abstract =     "Given a social network G, a cost associated with each
                 node, and a budget B, the budgeted influence
                 maximization (BIM) problem aims to find a set S of
                 nodes, denoted as the seed set, that maximizes the
                 expected number of influenced users under the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Prateek:2020:MTK,
  author =       "Arneish Prateek and Arijit Khan and Akshit Goyal and
                 Sayan Ranu",
  title =        "Mining Top-$k$ pairs of correlated subgraphs in a
                 large network",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1511--1524",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397245",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397245",
  abstract =     "We investigate the problem of correlated subgraphs
                 mining (CSM) where the goal is to identify pairs of
                 subgraph patterns that frequently co-occur in proximity
                 within a single graph. Correlated subgraph patterns are
                 different from frequent subgraphs due \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Buchnik:2020:FHT,
  author =       "Yehonatan Buchnik and Roy Friedman",
  title =        "{FireLedger}: a high throughput blockchain consensus
                 protocol",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1525--1539",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397246",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397246",
  abstract =     "Blockchains are distributed secure ledgers to which
                 transactions are issued continuously and each block of
                 transactions is tightly coupled to its predecessors.
                 Permissioned blockchains place special emphasis on
                 transactions throughput. In this paper we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:PEF,
  author =       "Kefei Wang and Jian Liu and Feng Chen",
  title =        "Put an elephant into a fridge: optimizing cache
                 efficiency for in-memory key--value stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1540--1554",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397247",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397247",
  abstract =     "In today's data centers, memory-based key-value
                 systems, such as Memcached and Redis, play an
                 indispensable role in providing high-speed data
                 services. The rapidly growing capacity and quickly
                 falling price of DRAM memory in the past years have
                 enabled \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pedersen:2020:ASR,
  author =       "Simon Aagaard Pedersen and Bin Yang and Christian S.
                 Jensen",
  title =        "Anytime stochastic routing with hybrid learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1555--1567",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397248",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397248",
  abstract =     "Increasingly massive volumes of vehicle trajectory
                 data hold the potential to enable higher-resolution
                 traffic services than hitherto possible. We use
                 trajectory data to create a high-resolution, uncertain
                 road-network graph, where edges are associated
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2020:UED,
  author =       "Qizhen Zhang and Yifan Cai and Xinyi Chen and
                 Sebastian Angel and Ang Chen and Vincent Liu and Boon
                 Thau Loo",
  title =        "Understanding the effect of data center resource
                 disaggregation on production {DBMSs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1568--1581",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397249",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397249",
  abstract =     "Resource disaggregation is a new architecture for data
                 centers in which resources like memory and storage are
                 decoupled from the CPU, managed independently, and
                 connected through a high-speed network. Recent work has
                 shown that although disaggregated \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tziavelis:2020:OAR,
  author =       "Nikolaos Tziavelis and Deepak Ajwani and Wolfgang
                 Gatterbauer and Mirek Riedewald and Xiaofeng Yang",
  title =        "Optimal algorithms for ranked enumeration of answers
                 to full conjunctive queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1582--1597",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397250",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397250",
  abstract =     "We study ranked enumeration of join-query results
                 according to very general orders defined by selective
                 dioids. Our main contribution is a framework for ranked
                 enumeration over a class of dynamic programming
                 problems that generalizes seemingly different
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dhulipala:2020:SPS,
  author =       "Laxman Dhulipala and Charles McGuffey and Hongbo Kang
                 and Yan Gu and Guy E. Blelloch and Phillip B. Gibbons
                 and Julian Shun",
  title =        "{Sage}: parallel semi-asymmetric graph algorithms for
                 {NVRAMs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "9",
  pages =        "1598--1613",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3397230.3397251",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Jul 8 18:23:01 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/abs/10.14778/3397230.3397251",
  abstract =     "Non-volatile main memory (NVRAM) technologies provide
                 an attractive set of features for large-scale graph
                 analytics, including byte-addressability, low idle
                 power, and improved memory-density. NVRAM systems today
                 have an order of magnitude more NVRAM \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2020:PIN,
  author =       "Yuqing Zhu and Jing Tang and Xueyan Tang",
  title =        "Pricing influential nodes in online social networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1614--1627",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401961",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401961",
  abstract =     "Influential nodes with rich connections in online
                 social networks (OSNs) are of great values to initiate
                 marketing campaigns. However, the potential influence
                 spread that can be generated by these influential nodes
                 is hidden behind the structures of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2020:KSA,
  author =       "Bintao Sun and Maximilien Danisch and T-H. Hubert Chan
                 and Mauro Sozio",
  title =        "{KClist++}: a simple algorithm for finding $k$-clique
                 densest subgraphs in large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1628--1640",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401962",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401962",
  abstract =     "The problem of finding densest subgraphs has received
                 increasing attention in recent years finding
                 applications in biology, finance, as well as social
                 network analysis. The k -clique densest subgraph
                 problem is a generalization of the densest subgraph
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wellenzohn:2020:DIC,
  author =       "Kevin Wellenzohn and Michael H. B{\"o}hlen and Sven
                 Helmer",
  title =        "Dynamic interleaving of content and structure for
                 robust indexing of semi-structured hierarchical data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1641--1653",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401963",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401963",
  abstract =     "We propose a robust index for semi-structured
                 hierarchical data that supports content-and-structure
                 (CAS) queries specified by path and value predicates.
                 At the heart of our approach is a novel dynamic
                 interleaving scheme that merges the path and value
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Agarwal:2020:CGS,
  author =       "Shubhangi Agarwal and Sourav Dutta and Arnab
                 Bhattacharya",
  title =        "{ChiSeL}: graph similarity search using chi-squared
                 statistics in large probabilistic graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1654--1668",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401964",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401964",
  abstract =     "Subgraph querying is one of the most important
                 primitives in many applications. Although the field is
                 well studied for deterministic graphs, in many
                 situations, the graphs are probabilistic in nature. In
                 this paper, we address the problem of subgraph
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tan:2020:FID,
  author =       "Zijing Tan and Ai Ran and Shuai Ma and Sheng Qin",
  title =        "Fast incremental discovery of pointwise order
                 dependencies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1669--1681",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401965",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401965",
  abstract =     "Pointwise order dependencies (PODs) are dependencies
                 that specify ordering semantics on attributes of
                 tuples. POD discovery refers to the process of
                 identifying the set $ \Sigma $ of valid and minimal
                 PODs on a given data set D. In practice D is typically
                 large \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Livshits:2020:ADC,
  author =       "Ester Livshits and Alireza Heidari and Ihab F. Ilyas
                 and Benny Kimelfeld",
  title =        "Approximate denial constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1682--1695",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401966",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401966",
  abstract =     "The problem of mining integrity constraints from data
                 has been extensively studied over the past two decades
                 for commonly used types of constraints, including the
                 classic Functional Dependencies (FDs) and the more
                 general Denial Constraints (DCs). In \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rehrmann:2020:SOO,
  author =       "Robin Rehrmann and Carsten Binnig and Alexander
                 B{\"o}hm and Kihong Kim and Wolfgang Lehner",
  title =        "Sharing opportunities for {OLTP} workloads in
                 different isolation levels",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1696--1708",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401967",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401967",
  abstract =     "OLTP applications are usually executed by a high
                 number of clients in parallel and are typically faced
                 with high throughput demand as well as a constraint
                 latency requirement for individual statements.
                 Interestingly, OLTP workloads are often read-heavy
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Neumann:2020:BBM,
  author =       "Stefan Neumann and Pauli Miettinen",
  title =        "Biclustering and {Boolean} matrix factorization in
                 data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1709--1722",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401968",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401968",
  abstract =     "We study clustering of bipartite graphs and Boolean
                 matrix factorization in data streams. We consider a
                 streaming setting in which the vertices from the left
                 side of the graph arrive one by one together with all
                 of their incident edges. We provide an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jian:2020:EER,
  author =       "Xun Jian and Yue Wang and Lei Chen",
  title =        "Effective and efficient relational community detection
                 and search in large dynamic heterogeneous information
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1723--1736",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401969",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401969",
  abstract =     "Community search in heterogeneous information networks
                 (HINs) has attracted much attention in graph analysis.
                 Given a vertex, the goal is to find a densely-connected
                 sub-graph that contains the vertex. In practice, the
                 user may need to restrict the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kim:2020:NLS,
  author =       "Hyeonji Kim and Byeong-Hoon So and Wook-Shin Han and
                 Hongrae Lee",
  title =        "Natural language to {SQL}: where are we today?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1737--1750",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401970",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401970",
  abstract =     "Translating natural language to SQL (NL2SQL) has
                 received extensive attention lately, especially with
                 the recent success of deep learning technologies.
                 However, despite the large number of studies, we do not
                 have a thorough understanding of how good \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Che:2020:ATD,
  author =       "Yulin Che and Zhuohang Lai and Shixuan Sun and Yue
                 Wang and Qiong Luo",
  title =        "Accelerating truss decomposition on heterogeneous
                 processors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1751--1764",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401971",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401971",
  abstract =     "Truss decomposition is to divide a graph into a
                 hierarchy of subgraphs, or trusses. A subgraph is a k
                 -truss ( k {$>$}= 2) if each edge is in at least k ---
                 2 triangles in the subgraph. Existing algorithms work
                 by first counting the number of triangles each
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mukherjee:2020:SDS,
  author =       "Rohan Mukherjee and Swarat Chaudhuri and Chris
                 Jermaine",
  title =        "Searching a database of source codes using
                 contextualized code search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1765--1778",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401972",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401972",
  abstract =     "Consider the case where a programmer has written some
                 part of a program, but has left part of the program
                 (such as a method or a function body) incomplete. The
                 goal is to use the context surrounding the missing code
                 to automatically ``figure out'' which \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2020:DSE,
  author =       "Yan Li and Tingjian Ge and Cindy Chen",
  title =        "Data stream event prediction based on timing knowledge
                 and state transitions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1779--1792",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401973",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401973",
  abstract =     "We study a practical problem of predicting the
                 upcoming events in data streams using a novel approach.
                 Treating event time orders as relationship types
                 between event entities, we build a dynamic knowledge
                 graph and use it to predict future event timing.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{McSherry:2020:SAP,
  author =       "Frank McSherry and Andrea Lattuada and Malte
                 Schwarzkopf and Timothy Roscoe",
  title =        "Shared arrangements: practical inter-query sharing for
                 streaming dataflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "10",
  pages =        "1793--1806",
  month =        jun,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3401960.3401974",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:36:56 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3401960.3401974",
  abstract =     "Current systems for data-parallel, incremental
                 processing and view maintenance over high-rate streams
                 isolate the execution of independent queries. This
                 creates unwanted redundancy and overhead in the
                 presence of concurrent incrementally maintained
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

%%% [29-Oct-2022] TO DO: v13n11 is not yet published
@Article{Gupta:2020:SBD,
  author =       "Peeyush Gupta and Michael J. Carey and Sharad Mehrotra
                 and oberto Yus",
  title =        "{SmartBench}: a benchmark for data management in smart
                 spaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1807--1820",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407791",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407791",
  abstract =     "This paper proposes SmartBench, a benchmark focusing
                 on queries resulting from (near) real-time applications
                 and longer-term analysis of IoT data. SmartBench,
                 derived from a deployed smart building monitoring
                 system, is comprised of: (1) An extensible \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Boniol:2020:SGB,
  author =       "Paul Boniol and Themis Palpanas",
  title =        "Series2Graph: graph-based subsequence anomaly
                 detection for time series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1821--1834",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407792",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407792",
  abstract =     "Subsequence anomaly detection in long sequences is an
                 important problem with applications in a wide range of
                 domains. However, the approaches that have been
                 proposed so far in the literature have severe
                 limitations: they either require prior domain
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2020:SCS,
  author =       "Dan Zhang and Madelon Hulsebos and Yoshihiko Suhara
                 and {\c{C}}agatay Demiralp and Jinfeng Li and
                 Wang-Chiew Tan",
  title =        "{Sato}: contextual semantic type detection in tables",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1835--1848",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407793",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407793",
  abstract =     "Detecting the semantic types of data columns in
                 relational tables is important for various data
                 preparation and information retrieval tasks such as
                 data cleaning, schema matching, data discovery, and
                 semantic search. However, existing detection \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{He:2020:TTP,
  author =       "Qijian He and Wei Yang and Bingren Chen and Yangyang
                 Geng and Liusheng Huang",
  title =        "{TransNet}: training privacy-preserving neural network
                 over transformed layer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1849--1862",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407794",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407794",
  abstract =     "The accuracy of neural network can be improved by
                 training over multi-participants' pooled dataset, but
                 privacy problem of sharing sensitive data obstructs
                 this collaborative learning. To solve this
                 contradiction, we propose TransNet, a novel solution
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2020:CAG,
  author =       "Wenfei Fan and Ruochun Jin and Muyang Liu and Ping Lu
                 and Chao Tian and Jingren Zhou",
  title =        "Capturing associations in graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1863--1876",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407795",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407795",
  abstract =     "This paper proposes a class of graph association
                 rules, denoted by GARs, to specify regularities between
                 entities in graphs. A GAR is a combination of a graph
                 pattern and a dependency; it may take as predicates ML
                 (machine learning) classifiers for link \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Renz-Wieland:2020:DPA,
  author =       "Alexander Renz-Wieland and Rainer Gemulla and Steffen
                 Zeuch and Volker Markl",
  title =        "Dynamic parameter allocation in parameter servers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1877--1890",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407796",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407796",
  abstract =     "To keep up with increasing dataset sizes and model
                 complexity, distributed training has become a necessity
                 for large machine learning tasks. Parameter servers
                 ease the implementation of distributed parameter
                 management---a key concern in distributed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Freitag:2020:AWC,
  author =       "Michael Freitag and Maximilian Bandle and Tobias
                 Schmidt and Alfons Kemper and Thomas Neumann",
  title =        "Adopting worst-case optimal joins in relational
                 database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1891--1904",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407797",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407797",
  abstract =     "Worst-case optimal join algorithms are attractive from
                 a theoretical point of view, as they offer
                 asymptotically better runtime than binary joins on
                 certain types of queries. In particular, they avoid
                 enumerating large intermediate results by processing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{McKenna:2020:WAM,
  author =       "Ryan McKenna and Raj Kumar Maity and Arya Mazumdar and
                 Gerome Miklau",
  title =        "A workload-adaptive mechanism for linear queries under
                 local differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1905--1918",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407798",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407798",
  abstract =     "We propose a new mechanism to accurately answer a
                 user-provided set of linear counting queries under
                 local differential privacy (LDP). Given a set of linear
                 counting queries (the workload) our mechanism
                 automatically adapts to provide accuracy on the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:SSP,
  author =       "Yisu Remy Wang and Shana Hutchison and Jonathan Leang
                 and Bill Howe and Dan Suciu",
  title =        "{SPORES}: sum-product optimization via relational
                 equality saturation for large scale linear algebra",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1919--1932",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407799",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407799",
  abstract =     "Machine learning algorithms are commonly specified in
                 linear algebra (LA). LA expressions can be rewritten
                 into more efficient forms, by taking advantage of input
                 properties such as sparsity, as well as program
                 properties such as common subexpressions. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fernandez:2020:DMP,
  author =       "Raul Castro Fernandez and Pranav Subramaniam and
                 Michael J. Franklin",
  title =        "Data market platforms: trading data assets to solve
                 data problems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1933--1947",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407800",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407800",
  abstract =     "Data only generates value for a few organizations with
                 expertise and resources to make data shareable,
                 discoverable, and easy to integrate. Sharing data that
                 is easy to discover and integrate is hard because data
                 owners lack information (who needs what \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mahdavi:2020:BEE,
  author =       "Mohammad Mahdavi and Ziawasch Abedjan",
  title =        "{Baran}: effective error correction via a unified
                 context representation and transfer learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1948--1961",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407801",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407801",
  abstract =     "Traditional error correction solutions leverage
                 handmaid rules or master data to find the correct
                 values. Both are often amiss in real-world scenarios.
                 Therefore, it is desirable to additionally learn
                 corrections from a limited number of example \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2020:RDS,
  author =       "Ju Fan and Junyou Chen and Tongyu Liu and Yuwei Shen
                 and Guoliang Li and Xiaoyong Du",
  title =        "Relational data synthesis using generative adversarial
                 networks: a design space exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1962--1975",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407802",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407802",
  abstract =     "The proliferation of big data has brought an urgent
                 demand for privacy-preserving data publishing.
                 Traditional solutions to this demand have limitations
                 on effectively balancing the tradeoff between privacy
                 and utility of the released data. Thus, the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2020:LLP,
  author =       "Lei Yang and Hong Wu and Tieying Zhang and Xuntao
                 Cheng and Feifei Li and Lei Zou and Yujie Wang and
                 Rongyao Chen and Jianying Wang and Gui Huang",
  title =        "{Leaper}: a learned prefetcher for cache invalidation
                 in {LSM}-tree based storage engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1976--1989",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407803",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407803",
  abstract =     "Frequency-based cache replacement policies that work
                 well on page-based database storage engines are no
                 longer sufficient for the emerging LSM-tree (
                 Log-Structure Merge-tree ) based storage engines. Due
                 to the append-only and copy-on-write techniques
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kang:2020:ASG,
  author =       "Daniel Kang and Edward Gan and Peter Bailis and
                 Tatsunori Hashimoto and Matei Zaharia",
  title =        "Approximate selection with guarantees using proxies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "1990--2003",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407804",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407804",
  abstract =     "Due to the falling costs of data acquisition and
                 storage, researchers and industry analysts often want
                 to find all instances of rare events in large datasets.
                 For instance, scientists can cheaply capture thousands
                 of hours of video, but are limited by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kang:2020:EIC,
  author =       "Minji Kang and Soyee Choi and Gihwan Oh and Sang-Won
                 Lee",
  title =        "{2R}: efficiently isolating cold pages in flash
                 storages",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2004--2017",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407805",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407805",
  abstract =     "Given skewed writes common in databases, the
                 conventional 1R-Greedy FTL incurs huge write
                 amplification, most of which is contributed by cold
                 pages amounting to 80\% of data. Since 1R-Greedy
                 manages all flash blocks in one region at no type
                 distinction, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bashardoost:2020:KT,
  author =       "Bahar Ghadiri Bashardoost and Ren{\'e}e J. Miller and
                 Kelly Lyons and Fatemeh Nargesian",
  title =        "Knowledge translation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2018--2032",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407806",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407806",
  abstract =     "We introduce Kensho, a tool for generating mapping
                 rules between two Knowledge Bases (KBs). To create the
                 mapping rules, Kensho starts with a set of
                 correspondences and enriches them with additional
                 semantic information automatically identified from the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Petersohn:2020:TSD,
  author =       "Devin Petersohn and Stephen Macke and Doris Xin and
                 William Ma and Doris Lee and Xiangxi Mo and Joseph E.
                 Gonzalez and Joseph M. Hellerstein and Anthony D.
                 Joseph and Aditya Parameswaran",
  title =        "Towards scalable dataframe systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2033--2046",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407807",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407807",
  abstract =     "Dataframes are a popular abstraction to represent,
                 prepare, and analyze data. Despite the remarkable
                 success of dataframe libraries in R and Python,
                 dataframes face performance issues even on moderately
                 large datasets. Moreover, there is significant
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2020:AFP,
  author =       "Yi Lu and Xiangyao Yu and Lei Cao and Samuel Madden",
  title =        "{Aria}: a fast and practical deterministic {OLTP}
                 database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2047--2060",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407808",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407808",
  abstract =     "Deterministic databases are able to efficiently run
                 transactions across different replicas without
                 coordination. However, existing state-of-the-art
                 deterministic databases require that transaction
                 read/write sets are known before execution, making such
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Miao:2020:COS,
  author =       "Dongjing Miao and Zhipeng Cai and Jianzhong Li and
                 Xiangyu Gao and Xianmin Liu",
  title =        "The computation of optimal subset repairs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2061--2074",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407809",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407809",
  abstract =     "Computing an optimal subset repair of an inconsistent
                 database is becoming a standalone research problem and
                 has a wide range of applications. However, it has not
                 been well-studied yet. A tight inapproximability bound
                 of the problem computing optimal \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Christodoulakis:2020:PPB,
  author =       "Christina Christodoulakis and Eric B. Munson and Moshe
                 Gabel and Angela Demke Brown and Ren{\'e}e J. Miller",
  title =        "{Pytheas}: pattern-based table discovery in {CSV}
                 files",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2075--2089",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407810",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407810",
  abstract =     "CSV is a popular Open Data format widely used in a
                 variety of domains for its simplicity and effectiveness
                 in storing and disseminating data. Unfortunately, data
                 published in this format often does not conform to
                 strict specifications, making automated \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2020:PPV,
  author =       "Yuncheng Wu and Shaofeng Cai and Xiaokui Xiao and Gang
                 Chen and Beng Chin Ooi",
  title =        "Privacy preserving vertical federated learning for
                 tree-based models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2090--2103",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407811",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407811",
  abstract =     "Federated learning (FL) is an emerging paradigm that
                 enables multiple organizations to jointly train a model
                 without revealing their private data to each other.
                 This paper studies vertical federated learning, which
                 tackles the scenarios where (i) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Al-Baghdadi:2020:TBC,
  author =       "Ahmed Al-Baghdadi and Xiang Lian",
  title =        "Topic-based community search over spatial-social
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2104--2117",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407812",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407812",
  abstract =     "Recently, the community search problem has attracted
                 significant attention, due to its wide spectrum of
                 real-world applications such as event organization,
                 friend recommendation, advertisement in e-commence, and
                 so on. Given a query vertex, the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fritz:2020:LME,
  author =       "Manuel Fritz and Michael Behringer and Holger
                 Schwarz",
  title =        "{LOG-Means}: efficiently estimating the number of
                 clusters in large datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2118--2131",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407813",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407813",
  abstract =     "Clustering is a fundamental primitive in manifold
                 applications. In order to achieve valuable results,
                 parameters of the clustering algorithm, e.g., the
                 number of clusters, have to be set appropriately, which
                 is a tremendous pitfall. To this end, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Krastnikov:2020:EOD,
  author =       "Simeon Krastnikov and Florian Kerschbaum and Douglas
                 Stebila",
  title =        "Efficient oblivious database joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2132--2145",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407814",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407814",
  abstract =     "A major algorithmic challenge in designing
                 applications intended for secure remote execution is
                 ensuring that they are oblivious to their inputs, in
                 the sense that their memory access patterns do not leak
                 sensitive information to the server. This \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Issa:2020:ETQ,
  author =       "Ousmane Issa and Angela Bonifati and Farouk Toumani",
  title =        "Evaluating top-$k$ queries with inconsistency
                 degrees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2146--2158",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407815",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407815",
  abstract =     "We study the problem of augmenting relational tuples
                 with inconsistency awareness and tackling top-k queries
                 under a set of denial constraints (DCs). We define a
                 notion of inconsistent tuples with respect to a set of
                 DCs and define two measures of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Nakandala:2020:CDS,
  author =       "Supun Nakandala and Yuhao Zhang and Arun Kumar",
  title =        "{Cerebro}: a data system for optimized deep learning
                 model selection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2159--2173",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407816",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See errata \cite{Nakandala:2021:ECD}.",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407816",
  abstract =     "Deep neural networks (deep nets) are revolutionizing
                 many machine learning (ML) applications. But there is a
                 major bottleneck to wider adoption: the pain and
                 resource intensiveness of model selection. This
                 empirical process involves exploring deep net
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gan:2020:COP,
  author =       "Edward Gan and Peter Bailis and Moses Charikar",
  title =        "{CoopStore}: optimizing precomputed summaries for
                 aggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2174--2187",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407817",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407817",
  abstract =     "An emerging class of data systems partition their data
                 and precompute approximate summaries (i.e., sketches
                 and samples) for each segment to reduce query costs.
                 They can then aggregate and combine the segment
                 summaries to estimate results without \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Koide:2020:FSS,
  author =       "Satoshi Koide and Chuan Xiao and Yoshiharu Ishikawa",
  title =        "Fast subtrajectory similarity search in road networks
                 under weighted edit distance constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2188--2201",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407818",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407818",
  abstract =     "In this paper, we address a similarity search problem
                 for spatial trajectories in road networks. In
                 particular, we focus on the subtrajectory similarity
                 search problem, which involves finding in a database
                 the subtrajectories similar to a query \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2020:SAG,
  author =       "Yu Liu and Lei Zou and Qian Ge and Zhewei Wei",
  title =        "{SimTab}: accuracy-guaranteed {SimRank} queries
                 through tighter confidence bounds and multi-armed
                 bandits",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2202--2214",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407819",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407819",
  abstract =     "SimRank is a classic measure of vertex-pair similarity
                 according to the structure of graphs. Top-$k$ and
                 thresholding SimRank queries are two important types of
                 similarity search with numerous applications in web
                 mining, social network analysis, spam \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dutt:2020:EAS,
  author =       "Anshuman Dutt and Chi Wang and Vivek Narasayya and
                 Surajit Chaudhuri",
  title =        "Efficiently approximating selectivity functions using
                 low overhead regression models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2215--2228",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407820",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407820",
  abstract =     "Today's query optimizers use fast selectivity
                 estimation techniques but are known to be susceptible
                 to large estimation errors. Recent work on supervised
                 learned models for selectivity estimation significantly
                 improves accuracy while ensuring relatively \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2020:IID,
  author =       "Yin Lin and Yifan Guan and Abolfazl Asudeh and H. V.
                 Jagadish",
  title =        "Identifying insufficient data coverage in databases
                 with multiple relations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2229--2242",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407821",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407821",
  abstract =     "In today's data-driven world, it is critical that we
                 use appropriate datasets for analysis and
                 decision-making. Datasets could be biased because they
                 reflect existing inequalities in the world, due to the
                 data scientists' biased world view, or due to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2020:CMA,
  author =       "Lingxiao Li and Muhammad Aamir Cheema and Mohammed
                 Eunus Ali and Hua Lu and David Taniar",
  title =        "Continuously monitoring alternative shortest paths on
                 road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2243--2255",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407822",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407822",
  abstract =     "Modern navigation systems do not only provide shortest
                 paths but also some alternative paths to provide more
                 options to the users. This paper is the first to study
                 the problem of continuously reporting alternative paths
                 for a user traveling along a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2020:HMC,
  author =       "Geon Lee and Jihoon Ko and Kijung Shin",
  title =        "Hypergraph motifs: concepts, algorithms, and
                 discoveries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2256--2269",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407823",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407823",
  abstract =     "Hypergraphs naturally represent group interactions,
                 which are omnipresent in many domains: collaborations
                 of researchers, co-purchases of items, joint
                 interactions of proteins, to name a few. In this work,
                 we propose tools for answering the following \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Birnick:2020:HSE,
  author =       "Johann Birnick and Thomas Bl{\"a}sius and Tobias
                 Friedrich and Felix Naumann and Thorsten Papenbrock and
                 Martin Schirneck",
  title =        "Hitting set enumeration with partial information for
                 unique column combination discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2270--2283",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407824",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407824",
  abstract =     "Unique column combinations (UCCs) are a fundamental
                 concept in relational databases. They identify entities
                 in the data and support various data management
                 activities. Still, UCCs are usually not explicitly
                 defined and need to be discovered. State-of-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2020:SDS,
  author =       "Yue Chen and Zhida Chen and Gao Cong and Ahmed R.
                 Mahmood and Walid G. Aref",
  title =        "{SSTD}: a distributed system on streaming
                 spatio-textual data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2284--2296",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407825",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407825",
  abstract =     "Streaming spatio-textual data that contains
                 geolocations and textual contents, e.g., geo-tagged
                 tweets, is becoming increasingly available. Users can
                 register continuous queries to receive up-to-date
                 results continuously, or pose snapshot queries to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mohammed:2020:CPI,
  author =       "Haneen Mohammed and Ziyun Wei and Eugene Wu and Ravi
                 Netravali",
  title =        "Continuous prefetch for interactive data
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2297--2311",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407826",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407826",
  abstract =     "Interactive data visualization and exploration (DVE)
                 applications are often network-bottlenecked due to
                 bursty request patterns, large response sizes, and
                 heterogeneous deployments over a range of networks and
                 devices. This makes it difficult to ensure \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:EES,
  author =       "Zheng Wang and Cheng Long and Gao Cong and Yiding
                 Liu",
  title =        "Efficient and effective similar subtrajectory search
                 with deep reinforcement learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2312--2325",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407827",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407827",
  abstract =     "Similar trajectory search is a fundamental problem and
                 has been well studied over the past two decades.
                 However, the similar subtrajectory search (SimSub)
                 problem, aiming to return a portion of a trajectory
                 (i.e., a subtrajectory), which is the most \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2020:BSE,
  author =       "Zequn Sun and Qingheng Zhang and Wei Hu and Chengming
                 Wang and Muhao Chen and Farahnaz Akrami and Chengkai
                 Li",
  title =        "A benchmarking study of embedding-based entity
                 alignment for knowledge graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2326--2340",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407828",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407828",
  abstract =     "Entity alignment seeks to find entities in different
                 knowledge graphs (KGs) that refer to the same
                 real-world object. Recent advancement in KG embedding
                 impels the advent of embedding-based entity alignment,
                 which encodes entities in a continuous \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Qi:2020:ELS,
  author =       "Jianzhong Qi and Guanli Liu and Christian S. Jensen
                 and Lars Kulik",
  title =        "Effectively learning spatial indices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2341--2354",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407829",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407829",
  abstract =     "Machine learning, especially deep learning, is used
                 increasingly to enable better solutions for data
                 management tasks previously solved by other means,
                 including database indexing. A recent study shows that
                 a neural network can not only learn to predict
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2020:SLB,
  author =       "Qiyu Liu and Libin Zheng and Yanyan Shen and Lei
                 Chen",
  title =        "Stable learned bloom filters for data streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2355--2367",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407830",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407830",
  abstract =     "Bloom filter and its variants are elegant
                 space-efficient probabilistic data structures for
                 approximate set membership queries. It has been
                 recently shown that the space cost of Bloom filters can
                 be significantly reduced via a combination with pre-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jin:2020:ATL,
  author =       "Zhongjun Jin and Yeye He and Surajit Chauduri",
  title =        "Auto-transform: learning-to-transform by patterns",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2368--2381",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407831",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407831",
  abstract =     "Data Transformation is a long-standing problem in data
                 management. Recent work adopts a
                 ``transform-by-example'' (TBE) paradigm to infer
                 transformation programs based on user-provided
                 input/output examples, which greatly improves
                 usability, and brought \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kossmann:2020:MMM,
  author =       "Jan Kossmann and Stefan Halfpap and Marcel Jankrift
                 and Rainer Schlosser",
  title =        "Magic mirror in my hand, which is the best in the
                 land?: an experimental evaluation of index selection
                 algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2382--2395",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407832",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407832",
  abstract =     "Indexes are essential for the efficient processing of
                 database workloads. Proposed solutions for the relevant
                 and challenging index selection problem range from
                 metadata-based simple heuristics, over sophisticated
                 multi-step algorithms, to approaches \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Damme:2020:MAQ,
  author =       "Patrick Damme and Annett Ungeth{\"u}m and Johannes
                 Pietrzyk and Alexander Krause and Dirk Habich and
                 Wolfgang Lehner",
  title =        "{MorphStore}: analytical query engine with a holistic
                 compression-enabled processing model",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2396--2410",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407833",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407833",
  abstract =     "In this paper, we present MorphStore, an open-source
                 in-memory columnar analytical query engine with a novel
                 holistic compression-enabled processing model.
                 Basically, compression using lightweight integer
                 compression algorithms already plays an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Parchas:2020:FED,
  author =       "Panos Parchas and Yonatan Naamad and Peter {Van
                 Bouwel} and Christos Faloutsos and Michalis
                 Petropoulos",
  title =        "Fast and effective distribution-key recommendation for
                 {Amazon Redshift}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2411--2423",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407834",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407834",
  abstract =     "How should we split data among the nodes of a
                 distributed data warehouse in order to boost
                 performance for a forecasted workload? In this paper,
                 we study the effect of different data partitioning
                 schemes on the overall network cost of pairwise joins.
                 We \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pappachan:2020:SMA,
  author =       "Primal Pappachan and Roberto Yus and Sharad Mehrotra
                 and Johann-Christoph Freytag",
  title =        "{Sieve}: a middleware approach to scalable access
                 control for database management systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2424--2437",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407835",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407835",
  abstract =     "Current approaches for enforcing Fine Grained Access
                 Control (FGAC) in DBMS do not scale to scenarios when
                 the number of access control policies are in the order
                 of thousands. This paper identifies such a use case in
                 the context of emerging smart spaces \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sreekanti:2020:CSF,
  author =       "Vikram Sreekanti and Chenggang Wu and Xiayue Charles
                 Lin and Johann Schleier-Smith and Joseph E. Gonzalez
                 and Joseph M. Hellerstein and Alexey Tumanov",
  title =        "{Cloudburst}: stateful functions-as-a-service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2438--2452",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407836",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407836",
  abstract =     "Function-as-a-Service (FaaS) platforms and
                 ``serverless'' cloud computing are becoming
                 increasingly popular due to ease-of-use and operational
                 simplicity. Current FaaS offerings are targeted at
                 stateless functions that do minimal I/O and
                 communication. We \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Suprem:2020:OAD,
  author =       "Abhijit Suprem and Joy Arulraj and Calton Pu and Joao
                 Ferreira",
  title =        "{ODIN}: automated drift detection and recovery in
                 video analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2453--2465",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407837",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407837",
  abstract =     "Recent advances in computer vision have led to a
                 resurgence of interest in visual data analytics.
                 Researchers are developing systems for effectively and
                 efficiently analyzing visual data at scale. A
                 significant challenge that these systems encounter
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Piao:2020:MRA,
  author =       "Chengzhi Piao and Weiguo Zheng and Yu Rong and Hong
                 Cheng",
  title =        "Maximizing the reduction ability for near-maximum
                 independent set computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2466--2478",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407838",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407838",
  abstract =     "Finding the maximum independent set is a fundamental
                 NP-hard problem in graph theory. Recent studies have
                 paid much attention to designing efficient algorithms
                 that find a maximal independent set of good quality
                 (the more vertices the better). \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2020:FTA,
  author =       "Zhao Chen and Peng Cheng and Lei Chen and Xuemin Lin
                 and Cyrus Shahabi",
  title =        "Fair task assignment in spatial crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2479--2492",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407839",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407839",
  abstract =     "With the pervasiveness of mobile devices, wireless
                 broadband and sharing economy, spatial crowdsourcing is
                 becoming part of our daily life. Existing studies on
                 spatial crowdsourcing usually focus on enhancing the
                 platform interests and customer \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2020:DSC,
  author =       "Hao Zhang and Jeffrey Xu Yu and Yikai Zhang and
                 Kangfei Zhao and Hong Cheng",
  title =        "Distributed subgraph counting: a general approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2493--2507",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407840",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407840",
  abstract =     "In this paper, we study local subgraph counting, which
                 is to count the occurrences of a user-given pattern
                 graph p around every node v in a data graph G, when v
                 matches to a given orbit o in p, where the orbit serves
                 as a center to count p. In general, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Karagiannis:2020:SMI,
  author =       "Georgios Karagiannis and Mohammed Saeed and Paolo
                 Papotti and Immanuel Trummer",
  title =        "{Scrutinizer}: a mixed-initiative approach to
                 large-scale, data-driven claim verification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2508--2521",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407841",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407841",
  abstract =     "Organizations spend significant amounts of time and
                 money to manually fact check text documents summarizing
                 data. The goal of the Scrutinizer system is to reduce
                 verification overheads by supporting human fact
                 checkers in translating text claims into \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Krivosheev:2020:DPC,
  author =       "Evgeny Krivosheev and Siarhei Bykau and Fabio Casati
                 and Sunil Prabhakar",
  title =        "Detecting and preventing confused labels in
                 crowdsourced data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2522--2535",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407842",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407842",
  abstract =     "Crowdsourcing is a challenging activity for many
                 reasons, from task design to workers' training,
                 identification of low-quality annotators, and many
                 more. A particularly subtle form of error is due to
                 confusion of observations, that is, crowd workers
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2020:OHC,
  author =       "Rong-Hua Li and Sen Gao and Lu Qin and Guoren Wang and
                 Weihua Yang and Jeffrey Xu Yu",
  title =        "Ordering heuristics for $k$-clique listing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2536--2548",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407843",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407843",
  abstract =     "Listing all k -cliques in a graph is a fundamental
                 graph mining problem that finds many important
                 applications in community detection and social network
                 analysis. Unfortunately, the problem of k -clique
                 listing is often deemed infeasible for a large k, as
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2020:DSM,
  author =       "Jinfeng Li and Yuliang Li and Xiaolan Wang and
                 Wang-Chiew Tan",
  title =        "Deep or simple models for semantic tagging?: it
                 depends on your data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2549--2562",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407844",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407844",
  abstract =     "Semantic tagging, which has extensive applications in
                 text mining, predicts whether a given piece of text
                 conveys the meaning of a given semantic tag. The
                 problem of semantic tagging is largely solved with
                 supervised learning and today, deep learning \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bilal:2020:DBC,
  author =       "Muhammad Bilal and Marco Serafini and Marco Canini and
                 Rodrigo Rodrigues",
  title =        "Do the best cloud configurations grow on trees?: an
                 experimental evaluation of black box algorithms for
                 optimizing cloud workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2563--2575",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407845",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407845",
  abstract =     "Cloud configuration optimization is the procedure to
                 determine the number and the type of instances to use
                 when deploying an application in cloud environments,
                 given a cost or performance objective. In the absence
                 of a performance model for the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2020:FLD,
  author =       "Alexander Zhou and Yue Wang and Lei Chen",
  title =        "Finding large diverse communities on networks: the
                 edge maximum $ k*$-partite clique",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2576--2589",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407846",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407846",
  abstract =     "In this work we examine the problem of finding large,
                 diverse communities on graphs where the users are
                 separated into distinct groups. More specifically, this
                 work considers diversity to be the inclusion of users
                 from multiple groups as opposed to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{vanderLinde:2020:PCS,
  author =       "Albert van der Linde and Jo{\~a}o Leit{\~a}o and Nuno
                 Pregui{\c{c}}a",
  title =        "Practical client-side replication: weak consistency
                 semantics for insecure settings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2590--2605",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407847",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407847",
  abstract =     "Client-side replication and direct client-to-client
                 synchronization can be used to create highly available,
                 low-latency interactive applications. Causal
                 consistency, the strongest available consistency model
                 under network partitions, is an attractive \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rong:2020:APS,
  author =       "Kexin Rong and Yao Lu and Peter Bailis and Srikanth
                 Kandula and Philip Levis",
  title =        "Approximate partition selection for big-data workloads
                 using summary statistics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2606--2619",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407848",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407848",
  abstract =     "Many big-data clusters store data in large partitions
                 that support access at a coarse, partition-level
                 granularity. As a result, approximate query processing
                 via row-level sampling is inefficient, often requiring
                 reads of many partitions. In this work, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Winter:2020:MMH,
  author =       "Christian Winter and Tobias Schmidt and Thomas Neumann
                 and Alfons Kemper",
  title =        "Meet me halfway: split maintenance of continuous
                 views",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2620--2633",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407849",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407849",
  abstract =     "From Industry 4.0-driven factories to real-time
                 trading algorithms, businesses depend on analytics on
                 high-velocity real-time data. Often these analytics are
                 performed not in dedicated stream processing engines
                 but on views within a general-purpose \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2020:UPB,
  author =       "Youmin Chen and Youyou Lu and Kedong Fang and Qing
                 Wang and Jiwu Shu",
  title =        "{uTree}: a persistent {B+-tree} with low tail
                 latency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2634--2648",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407850",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407850",
  abstract =     "Tail latency is a critical design issue in recent
                 storage systems. B$^+$ -tree, as a fundamental building
                 block in storage systems, incurs high tail latency,
                 especially when placed in persistent memory (PM). Our
                 empirical study specifies two factors that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Boncz:2020:FFR,
  author =       "Peter Boncz and Thomas Neumann and Viktor Leis",
  title =        "{FSST}: fast random access string compression",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2649--2661",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407851",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407851",
  abstract =     "Strings are prevalent in real-world data sets. They
                 often occupy a large fraction of the data and are slow
                 to process. In this work, we present Fast Static Symbol
                 Table (FSST), a lightweight compression scheme for
                 strings. On text data, FSST offers \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vogel:2020:MBC,
  author =       "Lukas Vogel and Viktor Leis and Alexander van Renen
                 and Thomas Neumann and Satoshi Imamura and Alfons
                 Kemper",
  title =        "{Mosaic}: a budget-conscious storage engine for
                 relational database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2662--2675",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407852",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407852",
  abstract =     "Relational database systems are purpose-built for a
                 specific storage device class (e.g., HDD, SSD, or
                 DRAM). They do not cope well with the multitude of
                 storage devices that are competitive at their price
                 `sweet spots'. To make use of different storage
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Blanusa:2020:MCE,
  author =       "Jovan Blanusa and Radu Stoica and Paolo Ienne and
                 Kubilay Atasu",
  title =        "Manycore clique enumeration with fast set
                 intersections",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2676--2690",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407853",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407853",
  abstract =     "Listing all maximal cliques of a given graph has
                 important applications in the analysis of social and
                 biological networks. Parallelisation of maximal clique
                 enumeration (MCE) algorithms on modern manycore
                 processors is challenging due to the task-level
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bater:2020:SPP,
  author =       "Johes Bater and Yongjoo Park and Xi He and Xiao Wang
                 and Jennie Rogers",
  title =        "{SAQE}: practical privacy-preserving approximate query
                 processing for data federations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2691--2705",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407854",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407854",
  abstract =     "A private data federation enables clients to query the
                 union of data from multiple data providers without
                 revealing any extra private information to the client
                 or any other data providers. Unfortunately, this strong
                 end-to-end privacy guarantee requires \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kuhlman:2020:RAA,
  author =       "Caitlin Kuhlman and Elke Rundensteiner",
  title =        "Rank aggregation algorithms for fair consensus",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2706--2719",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407855",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407855",
  abstract =     "Aggregating multiple rankings in a database is an
                 important task well studied by the database community.
                 High-stakes application domains include hiring,
                 lending, and education where multiple decision makers
                 rank candidates and their input is then \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Glasbergen:2020:SUA,
  author =       "Brad Glasbergen and Michael Abebe and Khuzaima Daudjee
                 and Amit Levi",
  title =        "Sentinel: universal analysis and insight for data
                 systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2720--2733",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407856",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407856",
  abstract =     "Systems continue to grow in complexity in response to
                 the need to support vast quantities of data and a wide
                 variety of workloads. Small changes in workloads and
                 system configuration can result in significantly
                 different system behaviour and performance \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fang:2020:ODC,
  author =       "Jingzhi Fang and Yanyan Shen and Yue Wang and Lei
                 Chen",
  title =        "Optimizing {DNN} computation graph using graph
                 substitutions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2734--2746",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407857",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407857",
  abstract =     "Deep learning has achieved great success in various
                 real-world applications. As deep neural networks (DNNs)
                 are getting larger, the inference and training cost of
                 DNNs increases significantly. Since one round of
                 inference or one iteration in the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sen:2020:ANL,
  author =       "Jaydeep Sen and Chuan Lei and Abdul Quamar and Fatma
                 {\"O}zcan and Vasilis Efthymiou and Ayushi Dalmia and
                 Greg Stager and Ashish Mittal and Diptikalyan Saha and
                 Karthik Sankaranarayanan",
  title =        "{ATHENA++}: natural language querying for complex
                 nested {SQL} queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2747--2759",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407858",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407858",
  abstract =     "Natural Language Interfaces to Databases (NLIDB)
                 systems eliminate the requirement for an end user to
                 use complex query languages like SQL, by translating
                 the input natural language (NL) queries to SQL
                 automatically. Although a significant volume of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xu:2020:CAD,
  author =       "Min Xu and Bolin Ding and Tianhao Wang and Jingren
                 Zhou",
  title =        "Collecting and analyzing data jointly from multiple
                 services under local differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2760--2772",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407859",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407859",
  abstract =     "Users' sensitive data can be collected and analyzed
                 under local differential privacy (LDP) without the need
                 to trust the data collector. Most previous work on LDP
                 can be applied when each user's data is generated and
                 collected from a single service or \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gan:2020:IDA,
  author =       "Yifan Gan and Xueyuan Ren and Drew Ripberger and
                 Spyros Blanas and Yang Wang",
  title =        "{IsoDiff}: debugging anomalies caused by weak
                 isolation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2773--2786",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407860",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407860",
  abstract =     "Weak isolation levels, such as Read Committed and
                 Snapshot Isolation, are widely used by databases for
                 their higher concurrency, but may introduce subtle
                 correctness errors in applications that only experts
                 can identify. This paper proposes IsoDiff, a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Barsky:2020:SRN,
  author =       "Marina Barsky and Jonathan Gabor and Mariano P.
                 Consens and Alex Thomo",
  title =        "Suffix rank: a new scalable algorithm for indexing
                 large string collections",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "12",
  pages =        "2787--2800",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3407790.3407861",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:33:57 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3407790.3407861",
  abstract =     "We investigate the problem of building a suffix array
                 substring index for inputs significantly larger than
                 main memory. This problem is especially important in
                 the context of biological sequence analysis, where
                 biological polymers can be thought of as \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zeng:2020:SBI,
  author =       "Yuxiang Zeng and Yongxin Tong and Yuguang Song and Lei
                 Chen",
  title =        "The simpler the better: an indexing approach for
                 shared-route planning queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "13",
  pages =        "3517--3530",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3424573.3424574",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3424573.3424574",
  abstract =     "Ridesharing services have gained global popularity as
                 a convenient, economic, and sustainable transportation
                 mode in recent years. One fundamental challenge in
                 these services is planning the shared-routes ( i.e.,
                 sequences of origins and destinations) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tanabe:2020:ACC,
  author =       "Takayuki Tanabe and Takashi Hoshino and Hideyuki
                 Kawashima and Osamu Tatebe",
  title =        "An analysis of concurrency control protocols for
                 in-memory databases with {CCBench}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "13",
  pages =        "3531--3544",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3424573.3424575",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3424573.3424575",
  abstract =     "This paper presents yet another concurrency control
                 analysis platform, CCBench. CCBench supports seven
                 protocols (Silo, TicToc, MOCC, Cicada, SI, SI with
                 latch-free SSN, 2PL) and seven versatile optimization
                 methods and enables the configuration of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:IUS,
  author =       "Tianhao Wang and Bolin Ding and Min Xu and Zhicong
                 Huang and Cheng Hong and Jingren Zhou and Ninghui Li
                 and Somesh Jha",
  title =        "Improving utility and security of the shuffler-based
                 differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "13",
  pages =        "3545--3558",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3424573.3424576",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3424573.3424576",
  abstract =     "When collecting information, local differential
                 privacy (LDP) alleviates privacy concerns of users
                 because their private information is randomized before
                 being sent it to the central aggregator. LDP imposes
                 large amount of noise as each user executes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kipf:2020:CIL,
  author =       "Andreas Kipf and Damian Chromejko and Alexander Hall
                 and Peter Boncz and David G. Andersen",
  title =        "Cuckoo index: a lightweight secondary index
                 structure",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "13",
  pages =        "3559--3572",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3424573.3424577",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3424573.3424577",
  abstract =     "In modern data warehousing, data skipping is essential
                 for high query performance. While index structures such
                 as B-trees or hash tables allow for precise pruning,
                 their large storage requirements make them impractical
                 for indexing secondary columns. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Abebe:2020:MAP,
  author =       "Michael Abebe and Brad Glasbergen and Khuzaima
                 Daudjee",
  title =        "{MorphoSys}: automatic physical design metamorphosis
                 for distributed database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "13",
  pages =        "3573--3587",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3424573.3424578",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3424573.3424578",
  abstract =     "Distributed database systems are widely used to meet
                 the demands of storing and managing computation-heavy
                 workloads. To boost performance and minimize resource
                 and data contention, these systems require selecting a
                 distributed physical design that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Behnezhad:2020:PGA,
  author =       "Soheil Behnezhad and Laxman Dhulipala and Hossein
                 Esfandiari and Jakub Lacki and Vahab Mirrokni and
                 Warren Schudy",
  title =        "Parallel graph algorithms in constant adaptive rounds:
                 theory meets practice",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "13",
  pages =        "3588--3602",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3424573.3424579",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3424573.3424579",
  abstract =     "We study fundamental graph problems such as graph
                 connectivity, minimum spanning forest (MSF), and
                 approximate maximum (weight) matching in a distributed
                 setting. In particular, we focus on the Adaptive
                 Massively Parallel Computation (AMPC) model, which
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:DLP,
  author =       "Runhui Wang and Dong Deng",
  title =        "{DeltaPQ}: lossless product quantization code
                 compression for high dimensional similarity search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "13",
  number =       "13",
  pages =        "3603--3616",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3424573.3424580",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3424573.3424580",
  abstract =     "High dimensional data is ubiquitous and plays an
                 important role in many applications. However, the size
                 of high dimensional data is usually excessively large.
                 To alleviate this problem, in this paper, we develop
                 novel techniques to compress and search \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Marcus:2020:BLI,
  author =       "Ryan Marcus and Andreas Kipf and Alexander van Renen
                 and Mihail Stoian and Sanchit Misra and Alfons Kemper
                 and Thomas Neumann and Tim Kraska",
  title =        "Benchmarking learned indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "1",
  pages =        "1--13",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3421424.3421425",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3421424.3421425",
  abstract =     "Recent advancements in learned index structures
                 propose replacing existing index structures, like
                 B-Trees, with approximate learned models. In this work,
                 we present a unified benchmark that compares well-tuned
                 implementations of three learned index \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:TGC,
  author =       "Zuozhi Wang and Kai Zeng and Botong Huang and Wei Chen
                 and Xiaozong Cui and Bo Wang and Ji Liu and Liya Fan
                 and Dachuan Qu and Zhenyu Hou and Tao Guan and Chen Li
                 and Jingren Zhou",
  title =        "Tempura: a general cost-based optimizer framework for
                 incremental data processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "1",
  pages =        "14--27",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3421424.3421427",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3421424.3421427",
  abstract =     "Incremental processing is widely-adopted in many
                 applications, ranging from incremental view
                 maintenance, stream computing, to recently emerging
                 progressive data warehouse and intermittent query
                 processing. Despite many algorithms developed on this
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Heo:2020:IGD,
  author =       "Geon Heo and Yuji Roh and Seonghyeon Hwang and Dayun
                 Lee and Steven Euijong Whang",
  title =        "Inspector gadget: a data programming-based labeling
                 system for industrial images",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "1",
  pages =        "28--36",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3421424.3421429",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3421424.3421429",
  abstract =     "As machine learning for images becomes democratized in
                 the Software 2.0 era, one of the serious bottlenecks is
                 securing enough labeled data for training. This problem
                 is especially critical in a manufacturing setting where
                 smart factories rely on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2020:SAN,
  author =       "Renchi Yang and Jieming Shi and Xiaokui Xiao and Yin
                 Yang and Juncheng Liu and Sourav S. Bhowmick",
  title =        "Scaling attributed network embedding to massive
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "1",
  pages =        "37--49",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3421424.3421430",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3421424.3421430",
  abstract =     "Given a graph G where each node is associated with a
                 set of attributes, attributed network embedding (ANE)
                 maps each node $ v \in G $ to a compact vector X$_v$,
                 which can be used in downstream machine learning tasks.
                 Ideally, $ X_v$ should capture node $v$'s affinity.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2020:DEM,
  author =       "Yuliang Li and Jinfeng Li and Yoshihiko Suhara and
                 AnHai Doan and Wang-Chiew Tan",
  title =        "Deep entity matching with pre-trained language
                 models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "1",
  pages =        "50--60",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3421424.3421431",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3421424.3421431",
  abstract =     "We present Ditto, a novel entity matching system based
                 on pre-trained Transformer-based language models. We
                 fine-tune and cast EM as a sequence-pair classification
                 problem to leverage such models with a simple
                 architecture. Our experiments show that a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2020:NOC,
  author =       "Zongheng Yang and Amog Kamsetty and Sifei Luan and
                 Eric Liang and Yan Duan and Xi Chen and Ion Stoica",
  title =        "{NeuroCard}: one cardinality estimator for all
                 tables",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "1",
  pages =        "61--73",
  month =        sep,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3421424.3421432",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:02 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3421424.3421432",
  abstract =     "Query optimizers rely on accurate cardinality
                 estimates to produce good execution plans. Despite
                 decades of research, existing cardinality estimators
                 are inaccurate for complex queries, due to making lossy
                 modeling assumptions and not capturing inter-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ding:2020:TLM,
  author =       "Jialin Ding and Vikram Nathan and Mohammad Alizadeh
                 and Tim Kraska",
  title =        "{Tsunami}: a learned multi-dimensional index for
                 correlated data and skewed workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "74--86",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425880",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425880",
  abstract =     "Filtering data based on predicates is one of the most
                 fundamental operations for any modern data warehouse.
                 Techniques to accelerate the execution of filter
                 expressions include clustered indexes, specialized sort
                 orders (e.g., Z-order), multi-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kang:2020:JOP,
  author =       "Daniel Kang and Ankit Mathur and Teja Veeramacheneni
                 and Peter Bailis and Matei Zaharia",
  title =        "Jointly optimizing preprocessing and inference for
                 {DNN}-based visual analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "87--100",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425881",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425881",
  abstract =     "While deep neural networks (DNNs) are an increasingly
                 popular way to query large corpora of data, their
                 significant runtime remains an active area of research.
                 As a result, researchers have proposed systems and
                 optimizations to reduce these costs by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Menon:2020:PCQ,
  author =       "Prashanth Menon and Amadou Ngom and Lin Ma and Todd C.
                 Mowry and Andrew Pavlo",
  title =        "Permutable compiled queries: dynamically adapting
                 compiled queries without recompiling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "101--113",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425882",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425882",
  abstract =     "Just-in-time (JIT) query compilation is a technique to
                 improve analytical query performance in database
                 management systems (DBMSs). But the cost of compiling
                 each query can be significant relative to its execution
                 time. This overhead prohibits the DBMS \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Min:2020:EEM,
  author =       "Seung Won Min and Vikram Sharma Mailthody and Zaid
                 Qureshi and Jinjun Xiong and Eiman Ebrahimi and Wen-mei
                 Hwu",
  title =        "{EMOGI}: efficient memory-access for out-of-memory
                 graph-traversal in {GPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "114--127",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425883",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425883",
  abstract =     "Modern analytics and recommendation systems are
                 increasingly based on graph data that capture the
                 relations between entities being analyzed. Practical
                 graphs come in huge sizes, offer massive parallelism,
                 and are stored in sparse-matrix formats such as
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2020:SFA,
  author =       "Yinda Zhang and Jinyang Li and Yutian Lei and Tong
                 Yang and Zhetao Li and Gong Zhang and Bin Cui",
  title =        "On-off sketch: a fast and accurate sketch on
                 persistence",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "128--140",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425884",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425884",
  abstract =     "Approximate stream processing has attracted much
                 attention recently. Prior art mostly focuses on
                 characteristics like frequency, cardinality, and
                 quantile. Persistence, as a new characteristic, is
                 getting increasing attention. Unlike frequency,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tran:2020:RTD,
  author =       "Luan Tran and Min Y. Mun and Cyrus Shahabi",
  title =        "Real-time distance-based outlier detection in data
                 streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "141--153",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425885",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425885",
  abstract =     "Real-time outlier detection in data streams has drawn
                 much attention recently as many applications need to be
                 able to detect abnormal behaviors as soon as they
                 occur. The arrival and departure of streaming data on
                 edge devices impose new challenges to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Poppe:2020:SIL,
  author =       "Olga Poppe and Tayo Amuneke and Dalitso Banda and
                 Aritra De and Ari Green and Manon Knoertzer and Ehi
                 Nosakhare and Karthik Rajendran and Deepak Shankargouda
                 and Meina Wang and Alan Au and Carlo Curino and Qun Guo
                 and Alekh Jindal and Ajay Kalhan and Morgan Oslake and
                 Sonia Parchani and Vijay Ramani and Raj Sellappan and
                 Saikat Sen and Sheetal Shrotri and Soundararajan
                 Srinivasan and Ping Xia and Shize Xu and Alicia Yang
                 and Yiwen Zhu",
  title =        "Seagull: an infrastructure for load prediction and
                 optimized resource allocation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "154--162",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425886",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425886",
  abstract =     "Microsoft Azure is dedicated to guarantee high quality
                 of service to its customers, in particular, during
                 periods of high customer activity, while controlling
                 cost. We employ a Data Science (DS) driven solution to
                 predict user load and leverage these \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:EKM,
  author =       "Sheng Wang and Yuan Sun and Zhifeng Bao",
  title =        "On the efficiency of {K-means} clustering: evaluation,
                 optimization, and algorithm selection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "163--175",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425887",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425887",
  abstract =     "This paper presents a thorough evaluation of the
                 existing methods that accelerate Lloyd's algorithm for
                 fast k -means clustering. To do so, we analyze the
                 pruning mechanisms of existing methods, and summarize
                 their common pipeline into a unified \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2020:RHA,
  author =       "Shixuan Sun and Xibo Sun and Yulin Che and Qiong Luo
                 and Bingsheng He",
  title =        "{RapidMatch}: a holistic approach to subgraph query
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "176--188",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425888",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425888",
  abstract =     "A subgraph query searches for all embeddings in a data
                 graph that are identical to a query graph. Two kinds of
                 algorithms, either graph exploration based or join
                 based, have been developed for processing subgraph
                 queries. Due to algorithmic and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xia:2020:TLP,
  author =       "Yu Xia and Xiangyao Yu and Andrew Pavlo and Srinivas
                 Devadas",
  title =        "Taurus: lightweight parallel logging for in-memory
                 database management systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "189--201",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425889",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425889",
  abstract =     "Existing single-stream logging schemes are unsuitable
                 for in-memory database management systems (DBMSs) as
                 the single log is often a performance bottleneck. To
                 overcome this problem, we present Taurus, an efficient
                 parallel logging scheme that uses \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Paul:2020:IEE,
  author =       "Johns Paul and Bingsheng He and Shengliang Lu and
                 Chiew Tong Lau",
  title =        "Improving execution efficiency of just-in-time
                 compilation based query processing on {GPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "202--214",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425890",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425890",
  abstract =     "In recent years, we have witnessed significant efforts
                 to improve the performance of Online Analytical
                 Processing (OLAP) on graphics processing units (GPUs).
                 Most existing studies have focused on improving memory
                 efficiency since memory stalls can play \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:PTS,
  author =       "Shuang Wang and Hakan Ferhatosmanoglu",
  title =        "{PPQ}-trajectory: spatio-temporal quantization for
                 querying in large trajectory repositories",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "215--227",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425891",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425891",
  abstract =     "We present PPQ-trajectory, a spatio-temporal
                 quantization based solution for querying large dynamic
                 trajectory data. PPQ-trajectory includes a
                 partition-wise predictive quantizer (PPQ) that
                 generates an error-bounded codebook with
                 autocorrelation and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hu:2020:ADP,
  author =       "Xiao Hu and Shouzhuo Sun and Shweta Patwa and Debmalya
                 Panigrahi and Sudeepa Roy",
  title =        "Aggregated deletion propagation for counting
                 conjunctive query answers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "2",
  pages =        "228--240",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3425879.3425892",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:03 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3425879.3425892",
  abstract =     "We investigate the computational complexity of
                 minimizing the source side-effect in order to remove a
                 given number of tuples from the output of a conjunctive
                 query. This is a variant of the well-studied deletion
                 propagation problem, the difference being \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Luo:2020:BMW,
  author =       "Chen Luo and Michael J. Carey",
  title =        "Breaking down memory walls: adaptive memory management
                 in {LSM}-based storage systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "241--254",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442425",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442425",
  abstract =     "Log-Structured Merge-trees (LSM-trees) have been
                 widely used in modern NoSQL systems. Due to their
                 out-of-place update design, LSM-trees have introduced
                 memory walls among the memory components of multiple
                 LSM-trees and between the write memory and the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Karlas:2020:NNC,
  author =       "Bojan Karlas and Peng Li and Renzhi Wu and Nezihe
                 Merve G{\"u}rel and Xu Chu and Wentao Wu and Ce Zhang",
  title =        "Nearest neighbor classifiers over incomplete
                 information: from certain answers to certain
                 predictions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "255--267",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442426",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442426",
  abstract =     "Machine learning (ML) applications have been thriving
                 recently, largely attributed to the increasing
                 availability of data. However, inconsistency and
                 incomplete information are ubiquitous in real-world
                 datasets, and their impact on ML applications
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kingsbury:2020:EII,
  author =       "Kyle Kingsbury and Peter Alvaro",
  title =        "{Elle}: inferring isolation anomalies from
                 experimental observations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "268--280",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442427",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442427",
  abstract =     "Users who care about their data store it in databases,
                 which (at least in principle) guarantee some form of
                 transactional isolation. However, experience shows that
                 many databases do not provide the isolation guarantees
                 they claim. With the recent \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kiefer:2020:SGF,
  author =       "Martin Kiefer and Ilias Poulakis and Sebastian
                 Bre{\ss} and Volker Markl",
  title =        "{Scotch}: generating {FPGA}-accelerators for sketching
                 at line rate",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "281--293",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442428",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442428",
  abstract =     "Sketching algorithms are a powerful tool for
                 single-pass data summarization. Their numerous
                 applications include approximate query processing,
                 machine learning, and large-scale network monitoring.
                 In the presence of high-bandwidth interconnects or in-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Khayati:2020:OOR,
  author =       "Mourad Khayati and Ines Arous and Zakhar Tymchenko and
                 Philippe Cudr{\'e}-Mauroux",
  title =        "{ORBITS}: online recovery of missing values in
                 multiple time series streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "294--306",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442429",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442429",
  abstract =     "With the emergence of the Internet of Things (IoT),
                 time series streams have become ubiquitous in our daily
                 life. Recording such data is rarely a perfect process,
                 as sensor failures frequently occur, yielding
                 occasional blocks of data that go missing in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Deng:2020:TTU,
  author =       "Xiang Deng and Huan Sun and Alyssa Lees and You Wu and
                 Cong Yu",
  title =        "{TURL}: table understanding through representation
                 learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "307--319",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442430",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442430",
  abstract =     "Relational tables on the Web store a vast amount of
                 knowledge. Owing to the wealth of such tables, there
                 has been tremendous progress on a variety of tasks in
                 the area of table understanding. However, existing work
                 generally relies on heavily-engineered \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Guo:2020:EUD,
  author =       "Long Guo and Lifeng Hua and Rongfei Jia and Fei Fang
                 and Binqiang Zhao and Bin Cui",
  title =        "{EdgeDIPN}: a unified deep intent prediction network
                 deployed at the edge",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "320--328",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442431",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442431",
  abstract =     "With the rapid growth of e-commerce in recent years,
                 e-commerce platforms are becoming a primary place for
                 people to find, compare and ultimately purchase
                 products. To improve online shopping experience for
                 consumers and increase sales for sellers, it \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2020:LCW,
  author =       "Yiming Lin and Daokun Jiang and Roberto Yus and
                 Georgios Bouloukakis and Andrew Chio and Sharad
                 Mehrotra and Nalini Venkatasubramanian",
  title =        "Locater: cleaning wifi connectivity datasets for
                 semantic localization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "329--341",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442432",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442432",
  abstract =     "This paper explores the data cleaning challenges that
                 arise in using WiFi connectivity data to locate users
                 to semantic indoor locations such as buildings,
                 regions, rooms. WiFi connectivity data consists of
                 sporadic connections between devices and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2020:MMT,
  author =       "Hao Liu and Jindong Han and Yanjie Fu and Jingbo Zhou
                 and Xinjiang Lu and Hui Xiong",
  title =        "Multi-modal transportation recommendation with unified
                 route representation learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "342--350",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442433",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442433",
  abstract =     "Multi-modal transportation recommendation aims to
                 provide the most appropriate travel route with various
                 transportation modes according to certain criteria.
                 After analyzing large-scale navigation data, we find
                 that route representations exhibit two \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2020:DDF,
  author =       "Yue Wang and Ruiqi Xu and Zonghao Feng and Yulin Che
                 and Lei Chen and Qiong Luo and Rui Mao",
  title =        "{Disk}: a distributed framework for single-source
                 {SimRank} with accuracy guarantee",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "351--363",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442434",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442434",
  abstract =     "Measuring similarities among different nodes is
                 important in graph analysis. SimRank is one of the most
                 popular similarity measures. Given a graph G ( V, E )
                 and a source node u, a single-source Sim-Rank query
                 returns the similarities between u and each \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Didona:2020:TBU,
  author =       "Diego Didona and Nikolas Ioannou and Radu Stoica and
                 Kornilios Kourtis",
  title =        "Toward a better understanding and evaluation of tree
                 structures on flash {SSDs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "364--377",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442435",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442435",
  abstract =     "Solid-state drives (SSDs) are extensively used to
                 deploy persistent data stores, as they provide low
                 latency random access, high write throughput, high data
                 density, and low cost. Tree-based data structures are
                 widely used to build persistent data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2020:AMD,
  author =       "Jianyu Yang and Tianhao Wang and Ninghui Li and Xiang
                 Cheng and Sen Su",
  title =        "Answering multi-dimensional range queries under local
                 differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "378--390",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442436",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442436",
  abstract =     "In this paper, we tackle the problem of answering
                 multi-dimensional range queries under local
                 differential privacy. There are three key technical
                 challenges: capturing the correlations among
                 attributes, avoiding the curse of dimensionality, and
                 dealing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Palyvos-Giannas:2020:ASF,
  author =       "Dimitris Palyvos-Giannas and Bastian Havers and Marina
                 Papatriantafilou and Vincenzo Gulisano",
  title =        "{Ananke}: a streaming framework for live forward
                 provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "391--403",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442437",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442437",
  abstract =     "Data streaming enables online monitoring of large and
                 continuous event streams in Cyber-Physical Systems
                 (CPSs). In such scenarios, fine-grained backward
                 provenance tools can connect streaming query results to
                 the source data producing them, allowing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lakhotia:2020:RRC,
  author =       "Kartik Lakhotia and Rajgopal Kannan and Viktor
                 Prasanna and Cesar A. F. {De Rose}",
  title =        "{Receipt}: refine coarse-grained independent tasks for
                 parallel tip decomposition of bipartite graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "404--417",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442438",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442438",
  abstract =     "Tip decomposition is a crucial kernel for mining dense
                 subgraphs in bipartite networks, with applications in
                 spam detection, analysis of affiliation networks etc.
                 It creates a hierarchy of vertex-induced subgraphs with
                 varying densities determined by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Deep:2020:CEW,
  author =       "Shaleen Deep and Anja Gruenheid and Paraschos Koutris
                 and Jeffrey Naughton and Stratis Viglas",
  title =        "Comprehensive and efficient workload compression",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "418--430",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442439",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442439",
  abstract =     "This work studies the problem of constructing a
                 representative workload from a given input analytical
                 query workload where the former serves as an
                 approximation with guarantees of the latter. We discuss
                 our work in the context of workload analysis and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{He:2020:CCO,
  author =       "Yongjun He and Jiacheng Lu and Tianzheng Wang",
  title =        "{CoroBase}: coroutine-oriented main-memory database
                 engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "431--444",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442440",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442440",
  abstract =     "Data stalls are a major overhead in main-memory
                 database engines due to the use of pointer-rich data
                 structures. Lightweight coroutines ease the
                 implementation of software prefetching to hide data
                 stalls by overlapping computation and asynchronous data
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Smith:2020:SQN,
  author =       "Jaclyn Smith and Michael Benedikt and Milos Nikolic
                 and Amir Shaikhha",
  title =        "Scalable querying of nested data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "3",
  pages =        "445--457",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.5555/3430915.3442441",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 15 05:34:04 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.5555/3430915.3442441",
  abstract =     "While large-scale distributed data processing
                 platforms have become an attractive target for query
                 processing, these systems are problematic for
                 applications that deal with nested collections.
                 Programmers are forced either to perform non-trivial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gong:2020:SCE,
  author =       "Long Gong and Ziheng Liu and Liang Liu and Jun Xu and
                 Mitsunori Ogihara and Tong Yang",
  title =        "Space- and computationally-efficient set
                 reconciliation via parity bitmap sketch {(PBS)}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "458--470",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436906",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436906",
  abstract =     "Set reconciliation is a fundamental algorithmic
                 problem that arises in many networking, system, and
                 database applications. In this problem, two large sets
                 A and B of objects (bitcoins, files, records, etc.) are
                 stored respectively at two different \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shetiya:2020:AAS,
  author =       "Suraj Shetiya and Saravanan Thirumuruganathan and Nick
                 Koudas and Gautam Das",
  title =        "{Astrid}: accurate selectivity estimation for string
                 predicates using deep learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "471--484",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436907",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436907",
  abstract =     "Accurate selectivity estimation for string predicates
                 is a long-standing research challenge in databases.
                 Supporting pattern matching on strings (such as prefix,
                 substring, and suffix) makes this problem much more
                 challenging, thereby necessitating a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zheng:2020:CTR,
  author =       "Nan Zheng and Zachary G. Ives",
  title =        "Compact, tamper-resistant archival of fine-grained
                 provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "485--497",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436909",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436909",
  abstract =     "Data provenance tools aim to facilitate reproducible
                 data science and auditable data analyses, by tracking
                 the processes and inputs responsible for each result of
                 an analysis. Fine-grained provenance further enables
                 sophisticated reasoning about why \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Muller:2020:RDI,
  author =       "Ingo M{\"u}ller and Ghislain Fourny and Stefan
                 Irimescu and Can Berker Cikis and Gustavo Alonso",
  title =        "{Rumble}: data independence for large messy data
                 sets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "498--506",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436910",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436910",
  abstract =     "This paper introduces Rumble, a query execution engine
                 for large, heterogeneous, and nested collections of
                 JSON objects built on top of Apache Spark. While data
                 sets of this type are more and more wide-spread, most
                 existing tools are built around a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chapman:2020:CQF,
  author =       "Adriane Chapman and Paolo Missier and Giulia Simonelli
                 and Riccardo Torlone",
  title =        "Capturing and querying fine-grained provenance of
                 preprocessing pipelines in data science",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "507--520",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436911",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436911",
  abstract =     "Data processing pipelines that are designed to clean,
                 transform and alter data in preparation for learning
                 predictive models, have an impact on those models'
                 accuracy and performance, as well on other properties,
                 such as model fairness. It is therefore \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Farias:2020:LDD,
  author =       "Victor A. E. Farias and Felipe T. Brito and Cheryl
                 Flynn and Javam C. Machado and Subhabrata Majumdar and
                 Divesh Srivastava",
  title =        "Local dampening: differential privacy for non-numeric
                 queries via local sensitivity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "521--533",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436912",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436912",
  abstract =     "Differential privacy is the state-of-the-art formal
                 definition for data release under strong privacy
                 guarantees. A variety of mechanisms have been proposed
                 in the literature for releasing the noisy output of
                 numeric queries (e.g., using the Laplace \ldots{}).",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2020:MDS,
  author =       "Tianyu Li and Matthew Butrovich and Amadou Ngom and
                 Wan Shen Lim and Wes McKinney and Andrew Pavlo",
  title =        "Mainlining databases: supporting fast transactional
                 workloads on universal columnar data file formats",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "534--546",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436913",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436913",
  abstract =     "The proliferation of modern data processing tools has
                 given rise to open-source columnar data formats. These
                 formats help organizations avoid repeated conversion of
                 data to a new format for each application. However,
                 these formats are read-only, and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2020:AEC,
  author =       "Shengliang Lu and Bingsheng He and Yuchen Li and Hao
                 Fu",
  title =        "Accelerating exact constrained shortest paths on
                 {GPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "547--559",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436914",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436914",
  abstract =     "The recently emerging applications such as
                 software-defined networks and autonomous vehicles
                 require efficient and exact solutions for constrained
                 shortest paths (CSP), which finds the shortest path in
                 a graph while satisfying some user-defined \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mo:2020:TEW,
  author =       "Songsong Mo and Zhifeng Bao and Ping Zhang and Zhiyong
                 Peng",
  title =        "Towards an efficient weighted random walk domination",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "560--572",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436915",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436915",
  abstract =     "In this paper, we propose and study a new problem
                 called the weighted random walk domination. Given a
                 weighted graph G ( V, E ) and a budget B of the
                 weighted random walk, it aims to find a k -size set S,
                 which can minimize the total costs of the remaining
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Guo:2020:SMM,
  author =       "Guimu Guo and Da Yan and M. Tamer {\"O}zsu and Zhe
                 Jiang and Jalal Khalil",
  title =        "Scalable mining of maximal quasi-cliques: an
                 algorithm-system codesign approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "573--585",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436916",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436916",
  abstract =     "Given a user-specified minimum degree threshold $
                 \gamma $, a $ \gamma $-quasiclique is a subgraph $ g =
                 (V_g, E_g)$ where each vertex $ \nu \in V_g$ connects
                 to at least $ \gamma $ fraction of the other vertices
                 (i.e., $ \lceil \gamma \cdot (|V_g| - 1) \rceil $
                 vertices) in $g$. Quasi-clique is one of the most
                 natural \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kokoris-Kogias:2020:CPD,
  author =       "Eleftherios Kokoris-Kogias and Enis Ceyhun Alp and
                 Linus Gasser and Philipp Jovanovic and Ewa Syta and
                 Bryan Ford",
  title =        "{CALYPSO}: private data management for decentralized
                 ledgers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "586--599",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436917",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436917",
  abstract =     "Distributed ledgers provide high availability and
                 integrity, making them a key enabler for practical and
                 secure computation of distributed workloads among
                 mutually distrustful parties. Many practical
                 applications also require strong confidentiality,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Deeds:2020:SFL,
  author =       "Kyle Deeds and Brian Hentschel and Stratos Idreos",
  title =        "Stacked filters: learning to filter by structure",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "600--612",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436919",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436919",
  abstract =     "We present Stacked Filters, a new probabilistic filter
                 which is fast and robust similar to query-agnostic
                 filters (such as Bloom and Cuckoo filters), and at the
                 same time brings low false positive rates and sizes
                 similar to classifier-based filters \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Banerjee:2020:MSW,
  author =       "Prithu Banerjee and Wei Chen and Laks V. S.
                 Lakshmanan",
  title =        "Maximizing social welfare in a competitive diffusion
                 model",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "613--625",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436920",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436920",
  abstract =     "Influence maximization (IM) has garnered a lot of
                 attention in the literature owing to applications such
                 as viral marketing and infection containment. It aims
                 to select a small number of seed users to adopt an item
                 such that adoption propagates to a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gugnani:2020:UIR,
  author =       "Shashank Gugnani and Arjun Kashyap and Xiaoyi Lu",
  title =        "Understanding the idiosyncrasies of real persistent
                 memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "626--639",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436921",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436921",
  abstract =     "High capacity persistent memory (PMEM) is finally
                 commercially available in the form of Intel's Optane DC
                 Persistent Memory Module (DCPMM). Researchers have
                 raced to evaluate and understand the performance of
                 DCPMM itself as well as systems and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gale:2020:EMR,
  author =       "Abraham Gale and Am{\'e}lie Marian",
  title =        "Explaining monotonic ranking functions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "640--652",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436922",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436922",
  abstract =     "Ranking functions are commonly used to assist in
                 decision-making in a wide variety of applications. As
                 the general public realizes the significant societal
                 impacts of the widespread use of algorithms in
                 decision-making, there has been a push towards
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dhulipala:2020:CFS,
  author =       "Laxman Dhulipala and Changwan Hong and Julian Shun",
  title =        "{ConnectIt}: a framework for static and incremental
                 parallel graph connectivity algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "653--667",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436923",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436923",
  abstract =     "Connected components is a fundamental kernel in graph
                 applications. The fastest existing multicore algorithms
                 for solving graph connectivity are based on some form
                 of edge sampling and/or linking and compressing trees.
                 However, many combinations of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kouadri:2020:QSA,
  author =       "Wissam Mammar Kouadri and Mourad Ouziri and Salima
                 Benbernou and Karima Echihabi and Themis Palpanas and
                 Iheb {Ben Amor}",
  title =        "Quality of sentiment analysis tools: the reasons of
                 inconsistency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "668--681",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436924",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436924",
  abstract =     "In this paper, we present a comprehensive study that
                 evaluates six state-of-the-art sentiment analysis tools
                 on five public datasets, based on the quality of
                 predictive results in the presence of semantically
                 equivalent documents, i.e., how consistent \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Garcia:2020:HLM,
  author =       "Rolando Garcia and Eric Liu and Vikram Sreekanti and
                 Bobby Yan and Anusha Dandamudi and Joseph E. Gonzalez
                 and Joseph M. Hellerstein and Koushik Sen",
  title =        "Hindsight logging for model training",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "682--693",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436925",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436925",
  abstract =     "In modern Machine Learning, model training is an
                 iterative, experimental process that can consume
                 enormous computation resources and developer time. To
                 aid in that process, experienced model developers log
                 and visualize program variables during training
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jiang:2020:SSI,
  author =       "Lin Jiang and Junqiao Qiu and Zhijia Zhao",
  title =        "Scalable structural index construction for {JSON}
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "694--707",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436926",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/java2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436926",
  abstract =     "JavaScript Object Notation (JSON) and its variants
                 have gained great popularity in recent years.
                 Unfortunately, the performance of their analytics is
                 often dragged down by the expensive JSON parsing. To
                 address this, recent work has shown that building
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rui:2020:EJA,
  author =       "Ran Rui and Hao Li and Yi-Cheng Tu",
  title =        "Efficient join algorithms for large database tables in
                 a multi-{GPU} environment",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "4",
  pages =        "708--720",
  month =        dec,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3436905.3436927",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Feb 23 08:32:42 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3436905.3436927",
  abstract =     "Relational join processing is one of the core
                 functionalities in database management systems. It has
                 been demonstrated that GPUs as a general-purpose
                 parallel computing platform is very promising in
                 processing relational joins. However, join algorithms
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yan:2021:FAP,
  author =       "Shuyuan Yan and Bolin Ding and Wei Guo and Jingren
                 Zhou and Zhewei Wei and Xiaowei Jiang and Sheng Xu",
  title =        "{FlashP}: an analytical pipeline for real-time
                 forecasting of time-series relational data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "721--729",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446096",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446096",
  abstract =     "Interactive response time is important in analytical
                 pipelines for users to explore a sufficient number of
                 possibilities and make informed business decisions. We
                 consider a forecasting pipeline with large volumes of
                 high-dimensional time series data. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Duong:2021:ESS,
  author =       "Chi Thang Duong and Trung Dung Hoang and Hongzhi Yin
                 and Matthias Weidlich and Quoc Viet Hung Nguyen and
                 Karl Aberer",
  title =        "Efficient streaming subgraph isomorphism with graph
                 neural networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "730--742",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446097",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446097",
  abstract =     "Queries to detect isomorphic subgraphs are important
                 in graph-based data management. While the problem of
                 subgraph isomorphism search has received considerable
                 attention for the static setting of a single query, or
                 a batch thereof, existing approaches \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2021:EBC,
  author =       "Yi Lu and Xiangyao Yu and Lei Cao and Samuel Madden",
  title =        "Epoch-based commit and replication in distributed
                 {OLTP} databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "743--756",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446098",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446098",
  abstract =     "Many modern data-oriented applications are built on
                 top of distributed OLTP databases for both scalability
                 and high availability. Such distributed databases
                 enforce atomicity, durability, and consistency through
                 two-phase commit (2PC) and synchronous \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2021:HCM,
  author =       "Zhe Lin and Fan Zhang and Xuemin Lin and Wenjie Zhang
                 and Zhihong Tian",
  title =        "Hierarchical core maintenance on large dynamic
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "757--770",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446099",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446099",
  abstract =     "The model of k -core and its decomposition have been
                 applied in various areas, such as social networks, the
                 world wide web, and biology. A graph can be decomposed
                 into an elegant k -core hierarchy to facilitate
                 cohesive subgraph discovery and network \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mohan:2021:AMD,
  author =       "Jayashree Mohan and Amar Phanishayee and Ashish
                 Raniwala and Vijay Chidambaram",
  title =        "Analyzing and mitigating data stalls in {DNN}
                 training",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "771--784",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446100",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446100",
  abstract =     "Training Deep Neural Networks (DNNs) is
                 resource-intensive and time-consuming. While prior
                 research has explored many different ways of reducing
                 DNN training time, the impact of input data pipeline,
                 i.e., fetching raw data items from storage and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hu:2021:PMH,
  author =       "Daokun Hu and Zhiwen Chen and Jianbing Wu and Jianhua
                 Sun and Hao Chen",
  title =        "Persistent memory hash indexes: an experimental
                 evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "785--798",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446101",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446101",
  abstract =     "Persistent memory (PM) is increasingly being leveraged
                 to build hash-based indexing structures featuring cheap
                 persistence, high performance, and instant recovery,
                 especially with the recent release of Intel Optane DC
                 Persistent Memory Modules. However, most of them are
                 evaluated on DRAM-based emulators with unreal
                 assumptions, or focus on the evaluation of specific
                 metrics with important properties sidestepped. Thus, it
                 is essential to understand how well the proposed hash
                 indexes perform on real PM and how they differentiate
                 from each other if a wider range of performance metrics
                 are considered. To this end, this paper provides a
                 comprehensive evaluation of persistent hash tables. In
                 particular, we focus on the evaluation of six
                 state-of-the-art hash tables including Level hashing,
                 CCEH, Dash, PCLHT, Clevel, and SOFT, with real PM
                 hardware. Our evaluation was conducted using a unified
                 benchmarking framework and representative workloads.
                 Besides characterizing common performance properties,
                 we also explore how hardware configurations (such as PM
                 bandwidth, CPU instructions, and NUMA) affect the
                 performance of PM-based hash tables. With our in-depth
                 analysis, we identify design trade-offs and good
                 paradigms in prior arts, and suggest desirable
                 optimizations and directions for the future development
                 of PM-based hash tables.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2021:OMD,
  author =       "Cheng Chen and Jun Yang and Mian Lu and Taize Wang and
                 Zhao Zheng and Yuqiang Chen and Wenyuan Dai and
                 Bingsheng He and Weng-Fai Wong and Guoan Wu and Yuping
                 Zhao and Andy Rudoff",
  title =        "Optimizing in-memory database engine for {AI}-powered
                 on-line decision augmentation using persistent memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "799--812",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446102",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446102",
  abstract =     "On-line decision augmentation (OLDA) has been
                 considered as a promising paradigm for real-time
                 decision making powered by Artificial Intelligence
                 (AI). OLDA has been widely used in many applications
                 such as real-time fraud detection, personalized
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Usta:2021:DMT,
  author =       "Arif Usta and Akifhan Karakayali and {\"O}zg{\"u}r
                 Ulusoy",
  title =        "{DBTagger}: multi-task learning for keyword mapping in
                 {NLIDBs} using bi-directional recurrent neural
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "813--821",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446103",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446103",
  abstract =     "Translating Natural Language Queries (NLQs) to
                 Structured Query Language (SQL) in interfaces deployed
                 in relational databases is a challenging task, which
                 has been widely studied in database community recently.
                 Conventional rule based systems utilize \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sarkhel:2021:IIE,
  author =       "Ritesh Sarkhel and Arnab Nandi",
  title =        "Improving information extraction from visually rich
                 documents using visual span representations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "822--834",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446104",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446104",
  abstract =     "Along with textual content, visual features play an
                 essential role in the semantics of visually rich
                 documents. Information extraction (IE) tasks perform
                 poorly on these documents if these visual cues are not
                 taken into account. In this paper, we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:ZHT,
  author =       "Gang Liu and Leying Chen and Shimin Chen",
  title =        "{Zen}: a high-throughput log-free {OLTP} engine for
                 non-volatile main memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "835--848",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446105",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446105",
  abstract =     "Emerging Nonvolatile memory (NVM) technologies like
                 3DX-point promise significant performance potential for
                 OLTP databases. However, transactional databases need
                 to be redesigned because the key assumptions that
                 non-volatile storage is orders of magnitude slower than
                 DRAM and only supports blocked-oriented access have
                 changed. NVMs are byte-addressable and almost as fast
                 as DRAM. The capacity of NVM is much (4-16x) larger
                 than DRAM. Such NVM characteristics make it possible to
                 build OLTP database entirely in NVM main
                 memory.\par

                 This paper studies the structure of OLTP engines with
                 hybrid NVM and DRAM memory. We observe three challenges
                 to design an OLTP engine for NVM: tuple metadata
                 modifications, NVM write redundancy, and NVM space
                 management. We propose Zen, a high-throughput log-free
                 OLTP engine for NVM. Zen addresses the three design
                 challenges with three novel techniques: metadata
                 enhanced tuple cache, log-free persistent transactions,
                 and light-weight NVM space management. Experimental
                 results on a real machine equipped with Intel Optane DC
                 Persistent Memory show that Zen achieves up to 10.1x
                 improvement compared with existing solutions to run an
                 OLTP database as large as the size of NVM while
                 achieving fast failure recovery.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ji:2021:DPB,
  author =       "Tianxi Ji and Pan Li and Emre Yilmaz and Erman Ayday
                 and Yanfang (Fanny) Ye and Jinyuan Sun",
  title =        "Differentially private binary- and matrix-valued data
                 query: an {XOR} mechanism",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "5",
  pages =        "849--862",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3446095.3446106",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 24 11:29:44 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3446095.3446106",
  abstract =     "Differential privacy has been widely adopted to
                 release continuous- and scalar-valued information on a
                 database without compromising the privacy of individual
                 data records in it. The problem of querying binary- and
                 matrix-valued information on a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Nakandala:2021:ECD,
  author =       "Supun Nakandala and Yuhao Zhang and Arun Kumar",
  title =        "Errata for {``Cerebro: a data system for optimized
                 deep learning model selection''}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "863--863",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447691",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See \cite{Nakandala:2020:CDS}.",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447691",
  abstract =     "We discovered that there was an inconsistency in the
                 communication cost formulation for the decentralized
                 fine-grained training method in Table 2 of our paper
                 [1]. We used Horovod as the archetype for decentralized
                 fine-grained approaches, and its \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yin:2021:PBD,
  author =       "Lujia Yin and Yiming Zhang and Zhaoning Zhang and
                 Yuxing Peng and Peng Zhao",
  title =        "{ParaX}: boosting deep learning for big data analytics
                 on many-core {CPUs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "864--877",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447692",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447692",
  abstract =     "Despite the fact that GPUs and accelerators are more
                 efficient in deep learning (DL), commercial clouds like
                 Facebook and Amazon now heavily use CPUs in DL
                 computation because there are large numbers of CPUs
                 which would otherwise sit idle during off-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cai:2021:OTF,
  author =       "Walter Cai and Philip A. Bernstein and Wentao Wu and
                 Badrish Chandramouli",
  title =        "Optimization of threshold functions over streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "878--889",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447693",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447693",
  abstract =     "A common stream processing application is alerting,
                 where the data stream management system (DSMS)
                 continuously evaluates a threshold function over
                 incoming streams. If the threshold is crossed, the DSMS
                 raises an alarm. The threshold function is often
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2021:BCI,
  author =       "Xuliang Zhu and Xin Huang and Byron Choi and Jiaxin
                 Jiang and Zhaonian Zou and Jianliang Xu",
  title =        "Budget constrained interactive search for multiple
                 targets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "890--902",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447694",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447694",
  abstract =     "Interactive graph search leverages human intelligence
                 to categorize target labels in a hierarchy, which is
                 useful for image classification, product
                 categorization, and database search. However, many
                 existing interactive graph search studies aim at
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2021:SMK,
  author =       "Yangjun Chen and Hoang Hai Nguyen",
  title =        "On the string matching with $k$ differences in {DNA}
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "903--915",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447695",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447695",
  abstract =     "In this paper, we discuss an efficient and effective
                 index mechanism for the string matching with k
                 differences, by which we will find all the substrings
                 of a target string y of length n that align with a
                 pattern string x of length m with not more than
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fujiwara:2021:FAA,
  author =       "Yasuhiro Fujiwara and Sekitoshi Kanai and Yasutoshi
                 Ida and Atsutoshi Kumagai and Naonori Ueda",
  title =        "Fast algorithm for anchor graph hashing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "916--928",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447696",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447696",
  abstract =     "Anchor graph hashing is used in many applications such
                 as cancer detection, web page classification, and drug
                 discovery. It computes the hash codes from the
                 eigenvectors of the matrix representing the
                 similarities between data points and anchor points;
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:ACG,
  author =       "Wangda Zhang and Junyoung Kim and Kenneth A. Ross and
                 Eric Sedlar and Lukas Stadler",
  title =        "Adaptive code generation for data-intensive
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "929--942",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447697",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447697",
  abstract =     "Modern database management systems employ
                 sophisticated query optimization techniques that enable
                 the generation of efficient plans for queries over very
                 large data sets. A variety of other applications also
                 process large data sets, but cannot leverage \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tsamoura:2021:MKB,
  author =       "Efthymia Tsamoura and David Carral and Enrico Malizia
                 and Jacopo Urbani",
  title =        "Materializing knowledge bases via trigger graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "943--956",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447699",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447699",
  abstract =     "The chase is a well-established family of algorithms
                 used to materialize Knowledge Bases (KBs) for tasks
                 like query answering under dependencies or data
                 cleaning. A general problem of chase algorithms is that
                 they might perform redundant computations. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:DEE,
  author =       "Jinfei Liu and Jian Lou and Junxu Liu and Li Xiong and
                 Jian Pei and Jimeng Sun",
  title =        "{Dealer}: an end-to-end model marketplace with
                 differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "957--969",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447700",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447700",
  abstract =     "Data-driven machine learning has become ubiquitous. A
                 marketplace for machine learning models connects data
                 owners and model buyers, and can dramatically
                 facilitate data-driven machine learning applications.
                 In this paper, we take a formal data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rahman:2021:NIS,
  author =       "Sajjadur Rahman and Mangesh Bendre and Yuyang Liu and
                 Shichu Zhu and Zhaoyuan Su and Karrie Karahalios and
                 Aditya G. Parameswaran",
  title =        "{NOAH}: interactive spreadsheet exploration with
                 dynamic hierarchical overviews",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "970--983",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447701",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447701",
  abstract =     "Spreadsheet systems are by far the most popular
                 platform for data exploration on the planet, supporting
                 millions of rows of data. However, exploring
                 spreadsheets that are this large via operations such as
                 scrolling or issuing formulae can be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2021:EBT,
  author =       "Yixing Yang and Yixiang Fang and Maria E. Orlowska and
                 Wenjie Zhang and Xuemin Lin",
  title =        "Efficient bi-triangle counting for large bipartite
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "984--996",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447702",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447702",
  abstract =     "A bipartite network is a network with two disjoint
                 vertex sets and its edges only exist between vertices
                 from different sets. It has received much interest
                 since it can be used to model the relationship between
                 two different sets of objects in many \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tata:2021:GSE,
  author =       "Sandeep Tata and Navneet Potti and James B. Wendt and
                 Lauro Beltr{\~a}o Costa and Marc Najork and Beliz
                 Gunel",
  title =        "{Glean}: structured extractions from templatic
                 documents",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "997--1005",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447703",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447703",
  abstract =     "Extracting structured information from templatic
                 documents is an important problem with the potential to
                 automate many real-world business workflows such as
                 payment, procurement, and payroll. The core challenge
                 is that such documents can be laid out in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gao:2021:IGL,
  author =       "Jun Gao and Jiazun Chen and Zhao Li and Ji Zhang",
  title =        "{ICS-GNN}: lightweight interactive community search
                 via graph neural network",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1006--1018",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447704",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447704",
  abstract =     "Searching a community containing a given query vertex
                 in an online social network enjoys wide applications
                 like recommendation, team organization, etc. When
                 applied to real-life networks, the existing approaches
                 face two major limitations. First, they \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2021:BEN,
  author =       "Yuanyuan Sun and Sheng Wang and Huorong Li and Feifei
                 Li",
  title =        "Building enclave-native storage engines for practical
                 encrypted databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1019--1032",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447705",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447705",
  abstract =     "Data confidentiality is one of the biggest concerns
                 that hinders enterprise customers from moving their
                 workloads to the cloud. Thanks to the trusted execution
                 environment (TEE), it is now feasible to build
                 encrypted databases in the enclave that can \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Thorne:2021:NLP,
  author =       "James Thorne and Majid Yazdani and Marzieh Saeidi and
                 Fabrizio Silvestri and Sebastian Riedel and Alon
                 Halevy",
  title =        "From natural language processing to neural databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1033--1039",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447706",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447706",
  abstract =     "In recent years, neural networks have shown impressive
                 performance gains on long-standing AI problems, such as
                 answering queries from text and machine translation.
                 These advances raise the question of whether neural
                 nets can be used at the core of query \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:RER,
  author =       "Haibo Wang and Chaoyi Ma and Olufemi O. Odegbile and
                 Shigang Chen and Jih-Kwon Peir",
  title =        "Randomized error removal for online spread estimation
                 in data streaming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1040--1052",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447707",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447707",
  abstract =     "Measuring flow spread in real time from large,
                 high-rate data streams has numerous practical
                 applications, where a data stream is modeled as a
                 sequence of data items from different flows and the
                 spread of a flow is the number of distinct items in the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{DeLeo:2021:TAS,
  author =       "Dean {De Leo} and Peter Boncz",
  title =        "{Teseo} and the analysis of structural dynamic
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1053--1066",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447708",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See errata \cite{Leo:2021:ETA}.",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447708",
  abstract =     "We present Teseo, a new system for the storage and
                 analysis of dynamic structural graphs in main-memory
                 and the addition of transactional support. Teseo
                 introduces a novel design based on sparse arrays, large
                 arrays interleaved with gaps, and a fat tree,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gubner:2021:CDS,
  author =       "Tim Gubner and Peter Boncz",
  title =        "Charting the design space of query execution using
                 {VOILA}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1067--1079",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447709",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447709",
  abstract =     "Database architecture, while having been studied for
                 four decades now, has delivered only a few designs with
                 well-understood properties. These few are followed by
                 most actual systems. Acquiring more knowledge about the
                 design space is a very time-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:HES,
  author =       "Zhiqi Wang and Jin Xue and Zili Shao",
  title =        "{Heracles}: an efficient storage model and data
                 flushing for performance monitoring timeseries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1080--1092",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447710",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447710",
  abstract =     "Performance-monitoring timeseries systems such as
                 Prometheus and InfluxDB play a critical role in
                 assuring reliability and operationally. These systems
                 commonly adopt a column-oriented storage model, by
                 which timeseries samples from different time-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Macke:2021:FGL,
  author =       "Stephen Macke and Hongpu Gong and Doris Jung-Lin Lee
                 and Andrew Head and Doris Xin and Aditya Parameswaran",
  title =        "Fine-grained lineage for safer notebook interactions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1093--1101",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447712",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447712",
  abstract =     "Computational notebooks have emerged as the platform
                 of choice for data science and analytical workflows,
                 enabling rapid iteration and exploration. By keeping
                 intermediate program state in memory and segmenting
                 units of execution into so-called ``cells'', \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tsitsulin:2021:FAG,
  author =       "Anton Tsitsulin and Marina Munkhoeva and Davide Mottin
                 and Panagiotis Karras and Ivan Oseledets and Emmanuel
                 M{\"u}ller",
  title =        "{FREDE}: anytime graph embeddings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1102--1110",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447713",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447713",
  abstract =     "Low-dimensional representations, or embeddings, of a
                 graph's nodes facilitate several practical data science
                 and data engineering tasks. As such embeddings rely,
                 explicitly or implicitly, on a similarity measure among
                 nodes, they require the computation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:AGM,
  author =       "Xiaodong Li and Reynold Cheng and Kevin Chen-Chuan
                 Chang and Caihua Shan and Chenhao Ma and Hongtai Cao",
  title =        "On analyzing graphs with motif-paths",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "6",
  pages =        "1111--1123",
  month =        feb,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3447689.3447714",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:38 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3447689.3447714",
  abstract =     "Path-based solutions have been shown to be useful for
                 various graph analysis tasks, such as link prediction
                 and graph clustering. However, they are no longer
                 adequate for handling complex and gigantic graphs.
                 Recently, motif-based analysis has attracted \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tsaras:2021:CIM,
  author =       "Dimitris Tsaras and George Trimponias and Lefteris
                 Ntaflos and Dimitris Papadias",
  title =        "Collective influence maximization for multiple
                 competing products with an awareness-to-influence
                 model",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1124--1136",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450981",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450981",
  abstract =     "Influence maximization (IM) is a fundamental task in
                 social network analysis. Typically, IM aims at
                 selecting a set of seeds for the network that
                 influences the maximum number of individuals. Motivated
                 by practical applications, in this paper we focus
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2021:FGS,
  author =       "Yahui Sun and Xiaokui Xiao and Bin Cui and Saman
                 Halgamuge and Theodoros Lappas and Jun Luo",
  title =        "Finding group {Steiner} trees in graphs with both
                 vertex and edge weights",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1137--1149",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450982",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450982",
  abstract =     "Given an undirected graph and a number of vertex
                 groups, the group Steiner trees problem is to find a
                 tree such that (i) this tree contains at least one
                 vertex in each vertex group; and (ii) the sum of vertex
                 and edge weights in this tree is minimized. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Abeywickrama:2021:OBM,
  author =       "Tenindra Abeywickrama and Victor Liang and Kian-Lee
                 Tan",
  title =        "Optimizing bipartite matching in real-world
                 applications by incremental cost computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1150--1158",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450983",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450983",
  abstract =     "The Kuhn-Munkres (KM) algorithm is a classical
                 combinatorial optimization algorithm that is widely
                 used for minimum cost bipartite matching in many
                 real-world applications, such as transportation. For
                 example, a ride-hailing service may use it to find
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Trummer:2021:CNE,
  author =       "Immanuel Trummer",
  title =        "The case for {NLP}-enhanced database tuning: towards
                 tuning tools that ``read the manual''",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1159--1165",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450984",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450984",
  abstract =     "A large body of knowledge on database tuning is
                 available in the form of natural language text. We
                 propose to leverage natural language processing (NLP)
                 to make that knowledge accessible to automated tuning
                 tools. We describe multiple avenues to exploit
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Maiyya:2021:EUC,
  author =       "Sujaya Maiyya and Faisal Nawab and Divyakant Agrawal
                 and Amr {El Abbadi}",
  title =        "Errata for {``Unifying consensus and atomic commitment
                 for effective cloud data management''}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1166--1166",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450985",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See \cite{Maiyya:2019:UCA}.",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450985",
  abstract =     "This errata article discusses and corrects a minor
                 error in our work published in VLDB 2019. The
                 discrepancy specifically pertains to Algorithms 3 and
                 4. The algorithms presented in the paper are biased
                 towards a commit decision in a specific failure
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Istvan:2021:SDD,
  author =       "Zsolt Istv{\'a}n and Soujanya Ponnapalli and Vijay
                 Chidambaram",
  title =        "Software-defined data protection: low overhead policy
                 compliance at the storage layer is within reach!",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1167--1174",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450986",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450986",
  abstract =     "Most modern data processing pipelines run on top of a
                 distributed storage layer, and securing the whole
                 system, and the storage layer in particular, against
                 accidental or malicious misuse is crucial to ensuring
                 compliance to rules and regulations. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:TRT,
  author =       "Tianyi Li and Lu Chen and Christian S. Jensen and
                 Torben Bach Pedersen",
  title =        "{TRACE}: real-time compression of streaming
                 trajectories in road networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1175--1187",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450987",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450987",
  abstract =     "The deployment of vehicle location services generates
                 increasingly massive vehicle trajectory data, which
                 incurs high storage and transmission costs. A range of
                 studies target offline compression to reduce the
                 storage cost. However, to enable online \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Saha:2021:SPC,
  author =       "Arkaprava Saha and Ruben Brokkelkamp and Yllka Velaj
                 and Arijit Khan and Francesco Bonchi",
  title =        "Shortest paths and centrality in uncertain networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1188--1201",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450988",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450988",
  abstract =     "Computing the shortest path between a pair of nodes is
                 a fundamental graph primitive, which has critical
                 applications in vehicle routing, finding functional
                 pathways in biological networks, survivable network
                 design, among many others. In this work, we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:ADAa,
  author =       "Tongyu Liu and Ju Fan and Yinqing Luo and Nan Tang and
                 Guoliang Li and Xiaoyong Du",
  title =        "Adaptive data augmentation for supervised learning
                 over missing data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1202--1214",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450989",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450989",
  abstract =     "Real-world data is dirty, which causes serious
                 problems in (supervised) machine learning (ML). The
                 widely used practice in such scenario is to first
                 repair the labeled source (a.k.a. train) data using
                 rule-, statistical- or ML-based methods and then use
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2021:KPA,
  author =       "Fuheng Zhao and Sujaya Maiyya and Ryan Wiener and
                 Divyakant Agrawal and Amr {El Abbadi}",
  title =        "{KLL$^\pm $} approximate quantile sketches over
                 dynamic datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1215--1227",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450990",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450990",
  abstract =     "Recently the long standing problem of optimal
                 construction of quantile sketches was resolved by
                 Karnin, Lang, and Liberty using the KLL sketch (FOCS
                 2016). The algorithm for KLL is restricted to online
                 insert operations and no delete operations. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jankov:2021:DNM,
  author =       "Dimitrije Jankov and Binhang Yuan and Shangyu Luo and
                 Chris Jermaine",
  title =        "Distributed numerical and machine learning
                 computations via two-phase execution of aggregated join
                 trees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1228--1240",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450991",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450991",
  abstract =     "When numerical and machine learning (ML) computations
                 are expressed relationally, classical query execution
                 strategies (hash-based joins and aggregations) can do a
                 poor job distributing the computation. In this paper,
                 we propose a two-phase execution \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{VanAken:2021:IML,
  author =       "Dana {Van Aken} and Dongsheng Yang and Sebastien
                 Brillard and Ari Fiorino and Bohan Zhang and Christian
                 Bilien and Andrew Pavlo",
  title =        "An inquiry into machine learning-based automatic
                 configuration tuning services on real-world database
                 management systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "7",
  pages =        "1241--1253",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3450980.3450992",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Apr 13 13:43:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3450980.3450992",
  abstract =     "Modern database management systems (DBMS) expose
                 dozens of configurable knobs that control their runtime
                 behavior. Setting these knobs correctly for an
                 application's workload can improve the performance and
                 efficiency of the DBMS. But because of their \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tang:2021:RRP,
  author =       "Nan Tang and Ju Fan and Fangyi Li and Jianhong Tu and
                 Xiaoyong Du and Guoliang Li and Sam Madden and Mourad
                 Ouzzani",
  title =        "{RPT}: relational pre-trained transformer is almost
                 all you need towards democratizing data preparation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1254--1261",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457391",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457391",
  abstract =     "Can AI help automate human-easy but computer-hard data
                 preparation tasks that burden data scientists,
                 practitioners, and crowd workers? We answer this
                 question by presenting RPT, a denoising autoencoder for
                 tuple-to-X models (`` X '' could be tuple, token,
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zou:2021:LAP,
  author =       "Jia Zou and Amitabh Das and Pratik Barhate and Arun
                 Iyengar and Binhang Yuan and Dimitrije Jankov and Chris
                 Jermaine",
  title =        "{Lachesis}: automatic partitioning for {UDF}-centric
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1262--1275",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457392",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457392",
  abstract =     "Partitioning is effective in avoiding expensive
                 shuffling operations. However, it remains a significant
                 challenge to automate this process for Big Data
                 analytics workloads that extensively use user defined
                 functions (UDFs), where sub-computations are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2021:ULI,
  author =       "Jiacheng Wu and Yong Zhang and Shimin Chen and Jin
                 Wang and Yu Chen and Chunxiao Xing",
  title =        "Updatable learned index with precise positions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1276--1288",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457393",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457393",
  abstract =     "Index plays an essential role in modern database
                 engines to accelerate the query processing. The new
                 paradigm of ``learned index'' has significantly changed
                 the way of designing index structures in DBMS. The key
                 insight is that indexes could be regarded \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fang:2021:MMS,
  author =       "Ziquan Fang and Lu Pan and Lu Chen and Yuntao Du and
                 Yunjun Gao",
  title =        "{MDTP}: a multi-source deep traffic prediction
                 framework over spatio-temporal trajectory data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1289--1297",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457394",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457394",
  abstract =     "Traffic prediction has drawn increasing attention for
                 its ubiquitous real-life applications in traffic
                 management, urban computing, public safety, and so on.
                 Recently, the availability of massive trajectory data
                 and the success of deep learning motivate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Min:2021:SCS,
  author =       "Seunghwan Min and Sung Gwan Park and Kunsoo Park and
                 Dora Giammarresi and Giuseppe F. Italiano and Wook-Shin
                 Han",
  title =        "Symmetric continuous subgraph matching with
                 bidirectional dynamic programming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1298--1310",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457395",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457395",
  abstract =     "In many real datasets such as social media streams and
                 cyber data sources, graphs change over time through a
                 graph update stream of edge insertions and deletions.
                 Detecting critical patterns in such dynamic graphs
                 plays an important role in various \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Suzuki:2021:ADP,
  author =       "Tomoya Suzuki and Kazuhiro Hiwada and Hirotsugu
                 Kajihara and Shintaro Sano and Shuou Nomura and Tatsuo
                 Shiozawa",
  title =        "Approaching {DRAM} performance by using
                 microsecond-latency flash memory for small-sized random
                 read accesses: a new access method and its graph
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1311--1324",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457397",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457397",
  abstract =     "For applications in which small-sized random accesses
                 frequently occur for datasets that exceed DRAM
                 capacity, placing the datasets on SSD can result in
                 poor application performance. For the read-intensive
                 case we focus on in this paper, low latency \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Orogat:2021:CTB,
  author =       "Abdelghny Orogat and Isabelle Liu and Ahmed El-Roby",
  title =        "{CBench}: towards better evaluation of question
                 answering over knowledge graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1325--1337",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457398",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457398",
  abstract =     "Recently, there has been an increase in the number of
                 knowledge graphs that can be only queried by experts.
                 However, describing questions using structured queries
                 is not straightforward for non-expert users who need to
                 have sufficient knowledge about \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yuan:2021:TRA,
  author =       "Binhang Yuan and Dimitrije Jankov and Jia Zou and
                 Yuxin Tang and Daniel Bourgeois and Chris Jermaine",
  title =        "Tensor relational algebra for distributed machine
                 learning system design",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1338--1350",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457399",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457399",
  abstract =     "We consider the question: what is the abstraction that
                 should be implemented by the computational engine of a
                 machine learning system? Current machine learning
                 systems typically push whole tensors through a series
                 of compute kernels such as matrix \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2021:PDD,
  author =       "Wenfei Fan and Chao Tian and Yanghao Wang and Qiang
                 Yin",
  title =        "Parallel discrepancy detection and incremental
                 detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1351--1364",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457400",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457400",
  abstract =     "This paper studies how to catch duplicates, mismatches
                 and conflicts in the same process. We adopt a class of
                 entity enhancing rules that embed machine learning
                 predicates, unify entity resolution and conflict
                 resolution, and are collectively defined \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:TCA,
  author =       "Tiantian Liu and Huan Li and Hua Lu and Muhammad Aamir
                 Cheema and Lidan Shou",
  title =        "Towards crowd-aware indoor path planning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1365--1377",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457401",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457401",
  abstract =     "Indoor venues accommodate many people who collectively
                 form crowds. Such crowds in turn influence people's
                 routing choices, e.g., people may prefer to avoid
                 crowded rooms when walking from A to B. This paper
                 studies two types of crowd-aware indoor path \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gupta:2021:PES,
  author =       "Surabhi Gupta and Karthik Ramachandra",
  title =        "Procedural extensions of {SQL}: understanding their
                 usage in the wild",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1378--1391",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457402",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457402",
  abstract =     "Procedural extensions of SQL have been in existence
                 for many decades now. However, little is known about
                 their magnitude of usage and their complexity in
                 real-world workloads. Procedural code executing in a
                 RDBMS is known to have inefficiencies and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bharadwaj:2021:DRD,
  author =       "Sagar Bharadwaj and Praveen Gupta and Ranjita Bhagwan
                 and Saikat Guha",
  title =        "Discovering related data at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1392--1400",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457403",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457403",
  abstract =     "Analysts frequently require data from multiple sources
                 for their tasks, but finding these sources is
                 challenging in exabyte-scale data lakes. In this paper,
                 we address this problem for our enterprise's data lake
                 by using machine-learning to identify \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cereda:2021:CCG,
  author =       "Stefano Cereda and Stefano Valladares and Paolo
                 Cremonesi and Stefano Doni",
  title =        "{CGPTuner}: a contextual {Gaussian} process bandit
                 approach for the automatic tuning of {IT}
                 configurations under varying workload conditions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1401--1413",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457404",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457404",
  abstract =     "Properly selecting the configuration of a database
                 management system (DBMS) is essential to increase
                 performance and reduce costs. However, the task is
                 astonishingly tricky due to a large number of tunable
                 configuration parameters and their inter-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schiavio:2021:LAI,
  author =       "Filippo Schiavio and Daniele Bonetta and Walter
                 Binder",
  title =        "Language-agnostic integrated queries in a managed
                 polyglot runtime",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1414--1426",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457405",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457405",
  abstract =     "Language-integrated query (LINQ) frameworks offer a
                 convenient programming abstraction for processing
                 in-memory collections of data, allowing developers to
                 concisely express declarative queries using
                 general-purpose programming languages. Existing LINQ",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kulkarni:2021:AHT,
  author =       "Chinmay Kulkarni and Badrish Chandramouli and Ryan
                 Stutsman",
  title =        "Achieving high throughput and elasticity in a
                 larger-than-memory store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1427--1440",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457406",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457406",
  abstract =     "Millions of sensors, mobile applications and machines
                 now generate billions of events. Specialized many-core
                 key-value stores (KVSs) can ingest and index these
                 events at high rates (over 100 Mops/s on one machine)
                 if events are generated on the same \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yao:2021:ESB,
  author =       "Kai Yao and Lijun Chang",
  title =        "Efficient size-bounded community search over large
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "8",
  pages =        "1441--1453",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3457390.3457407",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:31 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3457390.3457407",
  abstract =     "The problem of community search, which aims to find a
                 cohesive subgraph containing user-given query vertices,
                 has been extensively studied recently. Most of the
                 existing studies mainly focus on the cohesiveness of
                 the returned community, while ignoring \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2021:MVA,
  author =       "Jianwen Zhao and Yufei Tao",
  title =        "Minimum vertex augmentation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1454--1466",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461536",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461536",
  abstract =     "This paper introduces a class of graph problems named
                 minimum vertex augmentation (MVA). Given an input graph
                 G where each vertex carries a binary color 0 or 1, we
                 want to flip the colors of the fewest 0-vertices such
                 that the subgraph induced by all \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gaffney:2021:DIS,
  author =       "Kevin P. Gaffney and Robert Claus and Jignesh M.
                 Patel",
  title =        "Database isolation by scheduling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1467--1480",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461537",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461537",
  abstract =     "Transaction isolation is conventionally achieved by
                 restricting access to the physical items in a database.
                 To maximize performance, isolation functionality is
                 often packaged with recovery, I/O, and data access
                 methods in a monolithic transactional \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Park:2021:SSS,
  author =       "Jong-Hyeok Park and Soyee Choi and Gihwan Oh and
                 Sang-Won Lee",
  title =        "{SaS}: {SSD} as {SQL} database system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1481--1488",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461538",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461538",
  abstract =     "Every database engine runs on top of an operating
                 system in the host, strictly separated with the
                 storage. This more-than-half-century-old IHDE
                 (In-Host-Database-Engine) architecture, however,
                 reveals its limitations when run on fast flash memory
                 SSDs. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2021:FFL,
  author =       "Rong Zhu and Ziniu Wu and Yuxing Han and Kai Zeng and
                 Andreas Pfadler and Zhengping Qian and Jingren Zhou and
                 Bin Cui",
  title =        "{FLAT}: fast, lightweight and accurate method for
                 cardinality estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1489--1502",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461539",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461539",
  abstract =     "Query optimizers rely on accurate cardinality
                 estimation (CardEst) to produce good execution plans.
                 The core problem of CardEst is how to model the rich
                 joint distribution of attributes in an accurate and
                 compact manner. Despite decades of research, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chan:2021:FAA,
  author =       "Tsz Nam Chan and Zhe Li and Leong Hou U. and Jianliang
                 Xu and Reynold Cheng",
  title =        "Fast augmentation algorithms for network kernel
                 density visualization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1503--1516",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461540",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461540",
  abstract =     "Network kernel density visualization, or NKDV, has
                 been extensively used to visualize spatial data points
                 in various domains, including traffic accident hotspot
                 detection, crime hotspot detection, disease outbreak
                 detection, and business and urban \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:AAG,
  author =       "Jiawei Wang and Cheng Li and Kai Ma and Jingze Huo and
                 Feng Yan and Xinyu Feng and Yinlong Xu",
  title =        "{AUTOGR}: automated geo-replication with fast system
                 performance and preserved application semantics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1517--1530",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461541",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461541",
  abstract =     "Geo-replication is essential for providing low latency
                 response and quality Internet services. However,
                 designing fast and correct geo-replicated services is
                 challenging due to the complex trade-off between
                 performance and consistency semantics in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:LAD,
  author =       "Qing Liu and Xuliang Zhu and Xin Huang and Jianliang
                 Xu",
  title =        "Local algorithms for distance-generalized core
                 decomposition over large dynamic graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1531--1543",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461542",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461542",
  abstract =     "The distance-generalized core, also called ( k, h
                 )-core, is defined as the maximal subgraph in which
                 every vertex has at least k vertices at distance no
                 longer than h. Compared with k -core, ( k, h )-core can
                 identify more fine-grained subgraphs and, hence,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Benson:2021:VEH,
  author =       "Lawrence Benson and Hendrik Makait and Tilmann Rabl",
  title =        "{Viper}: an efficient hybrid {PMem-DRAM} key-value
                 store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1544--1556",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461543",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461543",
  abstract =     "Key-value stores (KVSs) have found wide application in
                 modern software systems. For persistence, their data
                 resides in slow secondary storage, which requires KVSs
                 to employ various techniques to increase their read and
                 write performance from and to the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zeighami:2021:ESC,
  author =       "Sepanta Zeighami and Cyrus Shahabi and John Krumm",
  title =        "Estimating spread of contact-based contagions in a
                 population through sub-sampling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1557--1569",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461544",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461544",
  abstract =     "Various phenomena such as viruses, gossips, and
                 physical objects (e.g., packages and marketing
                 pamphlets) can be spread through physical contacts. The
                 spread depends on how people move, i.e., their mobility
                 patterns. In practice, mobility patterns of an
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Herodotou:2021:TTS,
  author =       "Herodotos Herodotou and Elena Kakoulli",
  title =        "{Trident}: task scheduling over tiered storage systems
                 in big data platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1570--1582",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461545",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461545",
  abstract =     "The recent advancements in storage technologies have
                 popularized the use of tiered storage systems in
                 data-intensive compute clusters. The Hadoop Distributed
                 File System (HDFS), for example, now supports storing
                 data in memory, SSDs, and HDDs, while \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cong:2021:CCE,
  author =       "Zicun Cong and Lingyang Chu and Yu Yang and Jian Pei",
  title =        "Comprehensible counterfactual explanation on
                 {Kolmogorov--Smirnov} test",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1583--1596",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461546",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461546",
  abstract =     "The Kolmogorov--Smirnov (KS) test is popularly used in
                 many applications, such as anomaly detection,
                 astronomy, database security and AI systems. One
                 challenge remained untouched is how we can obtain an
                 explanation on why a test set fails the KS test. In
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2021:ALS,
  author =       "Hongkuan Zhou and Ajitesh Srivastava and Hanqing Zeng
                 and Rajgopal Kannan and Viktor Prasanna",
  title =        "Accelerating large scale real-time {GNN} inference
                 using channel pruning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1597--1605",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461547",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461547",
  abstract =     "Graph Neural Networks (GNNs) are proven to be powerful
                 models to generate node embedding for downstream
                 applications. However, due to the high computation
                 complexity of GNN inference, it is hard to deploy GNNs
                 for large-scale or real-time applications. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Leis:2021:TCO,
  author =       "Viktor Leis and Maximilian Kuschewski",
  title =        "Towards cost-optimal query processing in the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1606--1612",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461549",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461549",
  abstract =     "Public cloud providers offer hundreds of heterogeneous
                 hardware instances. For analytical query processing
                 systems, this presents a major challenge: depending on
                 the hardware configuration, performance and cost may
                 differ by orders of magnitude. We \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gong:2021:AIG,
  author =       "Shufeng Gong and Chao Tian and Qiang Yin and Wenyuan
                 Yu and Yanfeng Zhang and Liang Geng and Song Yu and Ge
                 Yu and Jingren Zhou",
  title =        "Automating incremental graph processing with flexible
                 memoization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1613--1625",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461550",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461550",
  abstract =     "The ever-growing amount of dynamic graph data demands
                 efficient techniques of incremental graph processing.
                 However, incremental graph algorithms are challenging
                 to develop. Existing approaches usually require users
                 to manually design nontrivial \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jepsen:2021:NST,
  author =       "Theo Jepsen and Alberto Lerner and Fernando Pedone and
                 Robert Soul{\'e} and Philippe Cudr{\'e}-Mauroux",
  title =        "In-network support for transaction triaging",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1626--1639",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461551",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461551",
  abstract =     "We introduce Transaction Triaging, a set of techniques
                 that manipulate streams of transaction requests and
                 responses while they travel to and from a database
                 server. Compared to normal transaction streams, the
                 triaged ones execute faster once they reach \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:WRL,
  author =       "Xiaoying Wang and Changbo Qu and Weiyuan Wu and
                 Jiannan Wang and Qingqing Zhou",
  title =        "Are we ready for learned cardinality estimation?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1640--1654",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461552",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461552",
  abstract =     "Cardinality estimation is a fundamental but long
                 unresolved problem in query optimization. Recently,
                 multiple papers from different research groups
                 consistently report that learned models have the
                 potential to replace existing cardinality estimators.
                 In \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lemiesz:2021:ADS,
  author =       "Jakub Lemiesz",
  title =        "On the algebra of data sketches",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1655--1667",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461553",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461553",
  abstract =     "We consider the problem of designing a distributed
                 data sketch for scenario in which data stream is
                 observed by many independent network nodes. We require
                 that a sketch apart from being computationally and
                 memory efficient should also be mergeable in a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hou:2021:MPA,
  author =       "Guanhao Hou and Xingguang Chen and Sibo Wang and
                 Zhewei Wei",
  title =        "Massively parallel algorithms for {Personalized
                 Pagerank}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1668--1680",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461554",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461554",
  abstract =     "Personalized PageRank (PPR) has wide applications in
                 search engines, social recommendations, community
                 detection, and so on. Nowadays, graphs are becoming
                 massive and many IT companies need to deal with large
                 graphs that cannot be fitted into the memory of most
                 commodity servers. However, most existing
                 state-of-the-art solutions for PPR computation only
                 work for single-machines and are inefficient for the
                 distributed framework since such solutions either (i)
                 result in an excessively large number of communication
                 rounds, or (ii) incur high communication costs in each
                 round.

                 Motivated by this, we present Delta-Push, an efficient
                 framework for single-source and top-$k$ PPR queries in
                 distributed settings. Our goal is to reduce the number
                 of rounds while guaranteeing that the load, i.e., the
                 maximum number of messages an executor sends or
                 receives in a round, can be bounded by the capacity of
                 each executor. We first present a non-trivial
                 combination of a redesigned parallel push algorithm and
                 the Monte-Carlo method to answer single-source PPR
                 queries. The solution uses pre-sampled random walks to
                 reduce the number of rounds for the push algorithm.
                 Theoretical analysis under the Massively Parallel
                 Computing (MPC) model shows that our proposed solution
                 bounds the communication rounds to [EQUATION] under a
                 load of O(m/p), where m is the number of edges of the
                 input graph, p is the number of executors, and $
                 \epsilon $ is a user-defined error parameter. In the
                 meantime, as the number of executors increases to $ p'
                 = \gamma \cdot p$, the load constraint can be relaxed
                 since each executor can hold $ O(\gamma \cdot m / p')$
                 messages with invariant local memory. In such
                 scenarios, multiple queries can be processed in batches
                 simultaneously. We show that with a load of $ O(\gamma
                 \cdot m / p')$, our Delta-Push can process $ \gamma $
                 queries in a batch with [EQUATION] rounds, while other
                 baseline solutions still keep the same round cost for
                 each batch. We further present a new top-$k$ algorithm
                 that is friendly to the distributed framework and
                 reduces the number of rounds required in practice.
                 Extensive experiments show that our proposed solution
                 is more efficient than alternatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schleich:2021:GQC,
  author =       "Maximilian Schleich and Zixuan Geng and Yihong Zhang
                 and Dan Suciu",
  title =        "{GeCo}: quality counterfactual explanations in real
                 time",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1681--1693",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3461555",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3461555",
  abstract =     "Machine learning is increasingly applied in
                 high-stakes decision making that directly affect
                 people's lives, and this leads to an increased demand
                 for systems to explain their decisions. Explanations
                 often take the form of counterfactuals, which
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Salazar:2021:AFE,
  author =       "Ricardo Salazar and Felix Neutatz and Ziawasch
                 Abedjan",
  title =        "Automated feature engineering for algorithmic
                 fairness",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "9",
  pages =        "1694--1702",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3461535.3463474",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 23 06:39:32 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3461535.3463474",
  abstract =     "One of the fundamental problems of machine ethics is
                 to avoid the perpetuation and amplification of
                 discrimination through machine learning applications.
                 In particular, it is desired to exclude the influence
                 of attributes with sensitive information, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Addanki:2021:HDR,
  author =       "Raghavendra Addanki and Sainyam Galhotra and Barna
                 Saha",
  title =        "How to design robust algorithms using noisy comparison
                 {Oracle}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1703--1716",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467862",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467862",
  abstract =     "Metric based comparison operations such as finding
                 maximum, nearest and farthest neighbor are fundamental
                 to studying various clustering techniques such as k
                 -center clustering and agglomerative hierarchical
                 clustering. These techniques crucially rely on
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Boniol:2021:SSS,
  author =       "Paul Boniol and John Paparrizos and Themis Palpanas
                 and Michael J. Franklin",
  title =        "{SAND}: streaming subsequence anomaly detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1717--1729",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467863",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467863",
  abstract =     "With the increasing demand for real-time analytics and
                 decision making, anomaly detection methods need to
                 operate over streams of values and handle drifts in
                 data distribution. Unfortunately, existing approaches
                 have severe limitations: they either \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xiao:2021:OFU,
  author =       "Yingtai Xiao and Zeyu Ding and Yuxin Wang and Danfeng
                 Zhang and Daniel Kifer",
  title =        "Optimizing fitness-for-use of differentially private
                 linear queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1730--1742",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467864",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467864",
  abstract =     "In practice, differentially private data releases are
                 designed to support a variety of applications. A data
                 release is fit for use if it meets target accuracy
                 requirements for each application. In this paper, we
                 consider the problem of answering linear \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cao:2021:CED,
  author =       "Xinle Cao and Jian Liu and Hao Lu and Kui Ren",
  title =        "Cryptanalysis of an encrypted database in {SIGMOD
                 '14}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1743--1755",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467865",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467865",
  abstract =     "Encrypted database is an innovative technology
                 proposed to solve the data confidentiality issue in
                 cloud-based DB systems. It allows a data owner to
                 encrypt its database before uploading it to the service
                 provider; and it allows the service provider to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jin:2021:USM,
  author =       "Tianyuan Jin and Yu Yang and Renchi Yang and Jieming
                 Shi and Keke Huang and Xiaokui Xiao",
  title =        "Unconstrained submodular maximization with modular
                 costs: tight approximation and application to profit
                 maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1756--1768",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467866",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467866",
  abstract =     "Given a set V, the problem of unconstrained submodular
                 maximization with modular costs (USM-MC) asks for a
                 subset $ S \subseteq $ V that maximizes $ f(S) $ --- $
                 c(S) $, where $f$ is a non-negative, monotone, and
                 submodular function that gauges the utility of S, and c
                 is a non-\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:DDL,
  author =       "Yuhao Zhang and Frank McQuillan and Nandish Jayaram
                 and Nikhil Kak and Ekta Khanna and Orhan Kislal and
                 Domino Valdano and Arun Kumar",
  title =        "Distributed deep learning on data systems: a
                 comparative analysis of approaches",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1769--1782",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467867",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467867",
  abstract =     "Deep learning (DL) is growing in popularity for many
                 data analytics applications, including among
                 enterprises. Large business-critical datasets in such
                 settings typically reside in RDBMSs or other data
                 systems. The DB community has long aimed to bring
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sheng:2021:PSM,
  author =       "Siyuan Sheng and Qun Huang and Sa Wang and Yungang
                 Bao",
  title =        "{PR}-sketch: monitoring per-key aggregation of
                 streaming data with nearly full accuracy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1783--1796",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467868",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467868",
  abstract =     "Computing per-key aggregation is indispensable in
                 streaming data analysis formulated as two phases, an
                 update phase and a recovery phase. As the size and
                 speed of data streams rise, accurate per-key
                 information is useful in many applications like
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Koutsoukos:2021:TAG,
  author =       "Dimitrios Koutsoukos and Supun Nakandala and
                 Konstantinos Karanasos and Karla Saur and Gustavo
                 Alonso and Matteo Interlandi",
  title =        "Tensors: an abstraction for general data processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1797--1804",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467869",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467869",
  abstract =     "Deep Learning (DL) has created a growing demand for
                 simpler ways to develop complex models and efficient
                 ways to execute them. Thus, a significant effort has
                 gone into frameworks like PyTorch or TensorFlow to
                 support a variety of DL models and run \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pujol:2021:BSM,
  author =       "David Pujol and Yikai Wu and Brandon Fain and Ashwin
                 Machanavajjhala",
  title =        "Budget sharing for multi-analyst differential
                 privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1805--1817",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467870",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467870",
  abstract =     "Large organizations that collect data about
                 populations (like the US Census Bureau) release summary
                 statistics that are used by multiple stakeholders for
                 resource allocation and policy making problems. These
                 organizations are also legally required to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Poepsel-Lemaitre:2021:LDS,
  author =       "Rudi Poepsel-Lemaitre and Martin Kiefer and Joscha von
                 Hein and Jorge-Arnulfo Quian{\'e}-Ruiz and Volker
                 Markl",
  title =        "In the land of data streams where synopses are
                 missing, one framework to bring them all",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1818--1831",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467871",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467871",
  abstract =     "In pursuit of real-time data analysis, approximate
                 summarization structures, i.e., synopses, have gained
                 importance over the years. However, existing stream
                 processing systems, such as Flink, Spark, and Storm, do
                 not support synopses as first class \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:DAI,
  author =       "Yifan Li and Xiaohui Yu and Nick Koudas",
  title =        "Data acquisition for improving machine learning
                 models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1832--1844",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467872",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467872",
  abstract =     "The vast advances in Machine Learning (ML) over the
                 last ten years have been powered by the availability of
                 suitably prepared data for training purposes. The
                 future of ML-enabled enterprise hinges on data. As
                 such, there is already a vibrant market \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2021:EAR,
  author =       "Xiaoshuang Chen and Kai Wang and Xuemin Lin and Wenjie
                 Zhang and Lu Qin and Ying Zhang",
  title =        "Efficiently answering reachability and path queries on
                 temporal bipartite graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1845--1858",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467873",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467873",
  abstract =     "Bipartite graphs are naturally used to model
                 relationships between two different types of entities,
                 such as people-location, author-paper, and
                 customer-product. When modeling real-world applications
                 like disease outbreaks, edges are often enriched with
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ciaccia:2021:PQT,
  author =       "Paolo Ciaccia and Davide Martinenghi and Riccardo
                 Torlone",
  title =        "Preference queries over taxonomic domains",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1859--1871",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467874",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467874",
  abstract =     "When composing multiple preferences characterizing the
                 most suitable results for a user, several issues may
                 arise. Indeed, preferences can be partially
                 contradictory, suffer from a mismatch with the level of
                 detail of the actual data, and even lack \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yan:2021:RDL,
  author =       "Baoyue Yan and Xuntao Cheng and Bo Jiang and Shibin
                 Chen and Canfang Shang and Jianying Wang and Gui Huang
                 and Xinjun Yang and Wei Cao and Feifei Li",
  title =        "Revisiting the design of {LSM}-tree Based {OLTP}
                 storage engine with persistent memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1872--1885",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467875",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467875",
  abstract =     "The recent byte-addressable and large-capacity
                 commercialized persistent memory (PM) is promising to
                 drive database as a service (DBaaS) into unchartered
                 territories. This paper investigates how to leverage
                 PMs to revisit the conventional LSM-tree based
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ge:2021:KCA,
  author =       "Chang Ge and Shubhankar Mohapatra and Xi He and Ihab
                 F. Ilyas",
  title =        "{Kamino}: constraint-aware differentially private data
                 synthesis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1886--1899",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467876",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467876",
  abstract =     "Organizations are increasingly relying on data to
                 support decisions. When data contains private and
                 sensitive information, the data owner often desires to
                 publish a synthetic database instance that is similarly
                 useful as the true data, while ensuring \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:TCE,
  author =       "Yingqiang Zhang and Chaoyi Ruan and Cheng Li and
                 Xinjun Yang and Wei Cao and Feifei Li and Bo Wang and
                 Jing Fang and Yuhui Wang and Jingze Huo and Chao Bi",
  title =        "Towards cost-effective and elastic cloud database
                 deployment via memory disaggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1900--1912",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467877",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467877",
  abstract =     "It is challenging for cloud-native relational
                 databases to meet the ever-increasing needs of scaling
                 compute and memory resources independently and
                 elastically. The recent emergence of memory
                 disaggregation architecture, relying on high-speed RDMA
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Peeters:2021:DOF,
  author =       "Ralph Peeters and Christian Bizer",
  title =        "Dual-objective fine-tuning of {BERT} for entity
                 matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "10",
  pages =        "1913--1921",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3467861.3467878",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Oct 27 15:40:22 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3467861.3467878",
  abstract =     "An increasing number of data providers have adopted
                 shared numbering schemes such as GTIN, ISBN, DUNS, or
                 ORCID numbers for identifying entities in the
                 respective domain. This means for data integration that
                 shared identifiers are often available for a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Besta:2021:GEH,
  author =       "Maciej Besta and Zur Vonarburg-Shmaria and Yannick
                 Schaffner and Leonardo Schwarz and Grzegorz Kwasniewski
                 and Lukas Gianinazzi and Jakub Beranek and Kacper Janda
                 and Tobias Holenstein and Sebastian Leisinger and Peter
                 Tatkowski and Esref Ozdemir and Adrian Balla and Marcin
                 Copik and Philipp Lindenberger and Marek Konieczny and
                 Onur Mutlu and Torsten Hoefler",
  title =        "{GraphMineSuite}: enabling high-performance and
                 programmable graph mining algorithms with set algebra",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "1922--1935",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476252",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476252",
  abstract =     "We propose GraphMineSuite (GMS): the first
                 benchmarking suite for graph mining that facilitates
                 evaluating and constructing high-performance graph
                 mining algorithms. First, GMS comes with a benchmark
                 specification based on extensive literature review,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Takenouchi:2021:PES,
  author =       "Keita Takenouchi and Takashi Ishio and Joji Okada and
                 Yuji Sakata",
  title =        "{PATSQL}: efficient synthesis of {SQL} queries from
                 example tables with quick inference of projected
                 columns",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "1937--1949",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476253",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476253",
  abstract =     "SQL is one of the most popular tools for data
                 analysis, and it is now used by an increasing number of
                 users without having expertise in databases. Several
                 studies have proposed programming-by-example approaches
                 to help such non-experts to write correct \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:FFA,
  author =       "Jie Liu and Wenqian Dong and Qingqing Zhou and Dong
                 Li",
  title =        "{Fauce}: fast and accurate deep ensembles with
                 uncertainty for cardinality estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "1950--1963",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476254",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476254",
  abstract =     "Cardinality estimation is a fundamental and critical
                 problem in databases. Recently, many estimators based
                 on deep learning have been proposed to solve this
                 problem and they have achieved promising results.
                 However, these estimators struggle to provide
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:CSE,
  author =       "Mengzhao Wang and Xiaoliang Xu and Qiang Yue and
                 Yuxiang Wang",
  title =        "A comprehensive survey and experimental comparison of
                 graph-based approximate nearest neighbor search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "1964--1978",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476255",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476255",
  abstract =     "Approximate nearest neighbor search (ANNS) constitutes
                 an important operation in a multitude of applications,
                 including recommendation systems, information
                 retrieval, and pattern recognition. In the past decade,
                 graph-based ANNS algorithms have been the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yuan:2021:TPP,
  author =       "Zifeng Yuan and Huey Eng Chua and Sourav S. Bhowmick
                 and Zekun Ye and Wook-Shin Han and Byron Choi",
  title =        "Towards plug-and-play visual graph query interfaces:
                 data-driven selection of canned patterns for large
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "1979--1991",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476256",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476256",
  abstract =     "Canned patterns ( i.e., small subgraph patterns) in
                 visual graph query interfaces (a.k.a GUI) facilitate
                 efficient query formulation by enabling
                 pattern-at-a-time construction mode. However, existing
                 GUIS for querying large networks either do not expose
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2021:TMG,
  author =       "Shixuan Sun and Yuhang Chen and Shengliang Lu and
                 Bingsheng He and Yuchen Li",
  title =        "{ThunderRW}: an in-memory graph random walk engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "1992--2005",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476257",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476257",
  abstract =     "As random walk is a powerful tool in many graph
                 processing, mining and learning applications, this
                 paper proposes an efficient in-memory random walk
                 engine named ThunderRW. Compared with existing parallel
                 systems on improving the performance of a single
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dong:2021:BCC,
  author =       "Zheng Dong and Xin Huang and Guorui Yuan and Hengshu
                 Zhu and Hui Xiong",
  title =        "Butterfly-core community search over labeled graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2006--2018",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476258",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476258",
  abstract =     "Community search aims at finding densely connected
                 subgraphs for query vertices in a graph. While this
                 task has been studied widely in the literature, most of
                 the existing works only focus on finding homogeneous
                 communities rather than heterogeneous \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Negi:2021:FLL,
  author =       "Parimarjan Negi and Ryan Marcus and Andreas Kipf and
                 Hongzi Mao and Nesime Tatbul and Tim Kraska and
                 Mohammad Alizadeh",
  title =        "{Flow-loss}: learning cardinality estimates that
                 matter",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2019--2032",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476259",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476259",
  abstract =     "Recently there has been significant interest in using
                 machine learning to improve the accuracy of cardinality
                 estimation. This work has focused on improving average
                 estimation error, but not all estimates matter equally
                 for downstream tasks like query \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yu:2021:QHK,
  author =       "Michael Yu and Dong Wen and Lu Qin and Ying Zhang and
                 Wenjie Zhang and Xuemin Lin",
  title =        "On querying historical $k$-cores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2033--2045",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476260",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476260",
  abstract =     "Many real-world relationships between entities can be
                 modeled as temporal graphs, where each edge is
                 associated with a timestamp or a time interval
                 representing its occurrence. K -core is a fundamental
                 model used to capture cohesive subgraphs in a simple
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cormode:2021:FEU,
  author =       "Graham Cormode and Samuel Maddock and Carsten Maple",
  title =        "Frequency estimation under local differential
                 privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2046--2058",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476261",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476261",
  abstract =     "Private collection of statistics from a large
                 distributed population is an important problem, and has
                 led to large scale deployments from several leading
                 technology companies. The dominant approach requires
                 each user to randomly perturb their input, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zogaj:2021:DML,
  author =       "Fatjon Zogaj and Jos{\'e} Pablo Cambronero and Martin
                 C. Rinard and J{\"u}rgen Cito",
  title =        "Doing more with less: characterizing dataset
                 downsampling for {AutoML}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2059--2072",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476262",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476262",
  abstract =     "Automated machine learning (AutoML) promises to
                 democratize machine learning by automatically
                 generating machine learning pipelines with little to no
                 user intervention. Typically, a search procedure is
                 used to repeatedly generate and validate candidate
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:LBE,
  author =       "Yifan Li and Xiaohui Yu and Nick Koudas",
  title =        "{LES 3}: learning-based exact set similarity search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2073--2086",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476263",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476263",
  abstract =     "Set similarity search is a problem of central interest
                 to a wide variety of applications such as data cleaning
                 and web search. Past approaches on set similarity
                 search utilize either heavy indexing structures,
                 incurring large search costs or indexes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Min:2021:LGC,
  author =       "Seung Won Min and Kun Wu and Sitao Huang and Mert
                 Hidayetoglu and Jinjun Xiong and Eiman Ebrahimi and
                 Deming Chen and Wen-mei Hwu",
  title =        "Large graph convolutional network training with
                 {GPU}-oriented data communication architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2087--2100",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476264",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476264",
  abstract =     "Graph Convolutional Networks (GCNs) are increasingly
                 adopted in large-scale graph-based recommender systems.
                 Training GCN requires the minibatch generator
                 traversing graphs and sampling the sparsely located
                 neighboring nodes to obtain their features. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2021:FHP,
  author =       "Yifei Yang and Matt Youill and Matthew Woicik and
                 Yizhou Liu and Xiangyao Yu and Marco Serafini and
                 Ashraf Aboulnaga and Michael Stonebraker",
  title =        "{FlexPushdownDB}: hybrid pushdown and caching in a
                 cloud {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2101--2113",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476265",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476265",
  abstract =     "Modern cloud databases adopt a storage-disaggregation
                 architecture that separates the management of
                 computation and storage. A major bottleneck in such an
                 architecture is the network connecting the computation
                 and storage layers. Two solutions have been \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2021:AMA,
  author =       "Zhiwei Chen and Shaoxu Song and Ziheng Wei and Jingyun
                 Fang and Jiang Long",
  title =        "Approximating median absolute deviation with bounded
                 error",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2114--2126",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476266",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476266",
  abstract =     "The median absolute deviation (MAD) is a statistic
                 measuring the variability of a set of quantitative
                 elements. It is known to be more robust to outliers
                 than the standard deviation (SD), and thereby widely
                 used in outlier detection. Computing the exact
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:EEG,
  author =       "Mengxuan Zhang and Lei Li and Xiaofang Zhou",
  title =        "An experimental evaluation and guideline for path
                 finding in weighted dynamic network",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2127--2140",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476267",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476267",
  abstract =     "Shortest path computation is a building block of
                 various network applications. Since real-life networks
                 evolve as time passes, the Dynamic Shortest Path (DSP)
                 problem has drawn lots of attention in recent years.
                 However, as DSP has many factors related \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vandevoort:2021:RAR,
  author =       "Brecht Vandevoort and Bas Ketsman and Christoph Koch
                 and Frank Neven",
  title =        "Robustness against read committed for transaction
                 templates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2141--2153",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476268",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476268",
  abstract =     "The isolation level Multiversion Read Committed (RC),
                 offered by many database systems, is known to trade
                 consistency for increased transaction throughput.
                 Sometimes, transaction workloads can be safely executed
                 under RC obtaining the perfect isolation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:LLC,
  author =       "Huayi Zhang and Lei Cao and Samuel Madden and Elke
                 Rundensteiner",
  title =        "{LANCET}: labeling complex data at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2154--2166",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476269",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476269",
  abstract =     "Cutting-edge machine learning techniques often require
                 millions of labeled data objects to train a robust
                 model. Because relying on humans to supply such a huge
                 number of labels is rarely practical, automated methods
                 for label generation are needed. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:VSE,
  author =       "Yang Li and Yu Shen and Wentao Zhang and Jiawei Jiang
                 and Bolin Ding and Yaliang Li and Jingren Zhou and Zhi
                 Yang and Wentao Wu and Ce Zhang and Bin Cui",
  title =        "{VolcanoML}: speeding up end-to-end {AutoML} via
                 scalable search space decomposition",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2167--2176",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476270",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476270",
  abstract =     "End-to-end AutoML has attracted intensive interests
                 from both academia and industry, which automatically
                 searches for ML pipelines in a space induced by feature
                 engineering, algorithm/model selection, and
                 hyper-parameter tuning. Existing AutoML systems,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cheng:2021:QTF,
  author =       "Peng Cheng and Jiabao Jin and Lei Chen and Xuemin Lin
                 and Libin Zheng",
  title =        "A queueing-theoretic framework for vehicle dispatching
                 in dynamic car-hailing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2177--2189",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476271",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476271",
  abstract =     "With the rapid development of smart mobile devices,
                 the car-hailing platforms (e.g., Uber or Lyft) have
                 attracted much attention from the academia and the
                 industry. In this paper, we consider a dynamic
                 car-hailing problem, namely maximum revenue vehicle
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cai:2021:DSD,
  author =       "Kuntai Cai and Xiaoyu Lei and Jianxin Wei and Xiaokui
                 Xiao",
  title =        "Data synthesis via differentially private {Markov}
                 random fields",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2190--2202",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476272",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476272",
  abstract =     "This paper studies the synthesis of high-dimensional
                 datasets with differential privacy (DP). The
                 state-of-the-art solution addresses this problem by
                 first generating a set M of noisy low-dimensional
                 marginals of the input data D, and then use them to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Whittaker:2021:SRS,
  author =       "Michael Whittaker and Ailidani Ailijiang and Aleksey
                 Charapko and Murat Demirbas and Neil Giridharan and
                 Joseph M. Hellerstein and Heidi Howard and Ion Stoica
                 and Adriana Szekeres",
  title =        "Scaling replicated state machines with
                 compartmentalization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2203--2215",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476273",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476273",
  abstract =     "State machine replication protocols, like MultiPaxos
                 and Raft, are a critical component of many distributed
                 systems and databases. However, these protocols offer
                 relatively low throughput due to several bottlenecked
                 components. Numerous existing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sarkar:2021:CAL,
  author =       "Subhadeep Sarkar and Dimitris Staratzis and Ziehen Zhu
                 and Manos Athanassoulis",
  title =        "Constructing and analyzing the {LSM} compaction design
                 space",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2216--2229",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476274",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476274",
  abstract =     "Log-structured merge (LSM) trees offer efficient
                 ingestion by appending incoming data, and thus, are
                 widely used as the storage layer of production NoSQL
                 data stores. To enable competitive read performance,
                 LSM-trees periodically re-organize data to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hellings:2021:BSB,
  author =       "Jelle Hellings and Mohammad Sadoghi",
  title =        "{ByShard}: sharding in a {Byzantine} environment",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2230--2243",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476275",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476275",
  abstract =     "The emergence of blockchains has fueled the
                 development of resilient systems that can deal with
                 Byzantine failures due to crashes, bugs, or even
                 malicious behavior. Recently, we have also seen the
                 exploration of sharding in these resilient systems,
                 this \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ertl:2021:SFG,
  author =       "Otmar Ertl",
  title =        "{SetSketch}: filling the gap between {MinHash} and
                 {HyperLogLog}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2244--2257",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476276",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476276",
  abstract =     "MinHash and HyperLogLog are sketching algorithms that
                 have become indispensable for set summaries in big data
                 applications. While HyperLogLog allows counting
                 different elements with very little space, MinHash is
                 suitable for the fast comparison of sets \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bao:2021:CEM,
  author =       "Ergute Bao and Yin Yang and Xiaokui Xiao and Bolin
                 Ding",
  title =        "{CGM}: an enhanced mechanism for streaming data
                 collection with local differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2258--2270",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476277",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476277",
  abstract =     "Local differential privacy (LDP) is a well-established
                 privacy protection scheme for collecting sensitive
                 data, which has been integrated into major platforms
                 such as iOS, Chrome, and Windows. The main idea is that
                 each individual randomly perturbs her \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Leo:2021:ETA,
  author =       "Dean {De Leo} and Per Fuchs and Peter Boncz",
  title =        "Errata for {``Teseo and the analysis of structural
                 dynamic graphs'': (PVLDB {\bf 14}(6):1053--1066)}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2271--2272",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476278",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See \cite{DeLeo:2021:TAS}.",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476278",
  abstract =     "In our paper [4], we experimentally evaluated our
                 work, Teseo, together with five other systems under the
                 LDBC Graphalytics benchmark [6]. We developed and
                 publicly released [2] an ad-hoc driver for the purpose.
                 Since the time the paper was published, a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Musleh:2021:QMB,
  author =       "Mashaal Musleh and Sofiane Abbar and Rade Stanojevic
                 and Mohamed Mokbel",
  title =        "{QARTA}: an {ML}-based system for accurate map
                 services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2273--2282",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476279",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476279",
  abstract =     "Maps services are ubiquitous in widely used
                 applications including navigation systems, ride
                 sharing, and items/food delivery. Though there are
                 plenty of efforts to support such services through
                 designing more efficient algorithms, we believe that
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cunningham:2021:RWT,
  author =       "Teddy Cunningham and Graham Cormode and Hakan
                 Ferhatosmanoglu and Divesh Srivastava",
  title =        "Real-world trajectory sharing with local differential
                 privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2283--2295",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476280",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476280",
  abstract =     "Sharing trajectories is beneficial for many real-world
                 applications, such as managing disease spread through
                 contact tracing and tailoring public services to a
                 population's travel patterns. However, public concern
                 over privacy and data protection has \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sinthong:2021:PRQ,
  author =       "Phanwadee Sinthong and Michael J. Carey",
  title =        "{PolyFrame}: a retargetable query-based approach to
                 scaling dataframes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2296--2304",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476281",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 06:21:49 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476281",
  abstract =     "In the last few years, the field of data science has
                 been growing rapidly as various businesses have adopted
                 statistical and machine learning techniques to empower
                 their decision-making and applications. Scaling data
                 analyses to large volumes of data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shi:2021:SCD,
  author =       "Jessica Shi and Laxman Dhulipala and David Eisenstat
                 and Jakub Lacki and Vahab Mirrokni",
  title =        "Scalable community detection via parallel correlation
                 clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2305--2313",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476282",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476282",
  abstract =     "Graph clustering and community detection are central
                 problems in modern data mining. The increasing need for
                 analyzing billion-scale data calls for faster and more
                 scalable algorithms for these problems. There are
                 certain trade-offs between the quality \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xu:2021:SSB,
  author =       "Cheng Xu and Ce Zhang and Jianliang Xu and Jian Pei",
  title =        "{SlimChain}: scaling blockchain transactions through
                 off-chain storage and parallel processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2314--2326",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476283",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476283",
  abstract =     "Blockchain technology has emerged as the cornerstone
                 of many decentralized applications operating among
                 otherwise untrusted peers. However, it is well known
                 that existing blockchain systems do not scale well.
                 Transactions are often executed and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:TOG,
  author =       "Side Li and Arun Kumar",
  title =        "Towards an optimized {GROUP} by abstraction for
                 large-scale machine learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2327--2340",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476284",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476284",
  abstract =     "Many applications that use large-scale machine
                 learning (ML) increasingly prefer different models for
                 subgroups (e.g., countries) to improve accuracy,
                 fairness, or other desiderata. We call this emerging
                 popular practice learning over groups, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kang:2021:AAA,
  author =       "Daniel Kang and John Guibas and Peter Bailis and
                 Tatsunori Hashimoto and Yi Sun and Matei Zaharia",
  title =        "Accelerating approximate aggregation queries with
                 expensive predicates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2341--2354",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476285",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476285",
  abstract =     "Researchers and industry analysts are increasingly
                 interested in computing aggregation queries over large,
                 unstructured datasets with selective predicates that
                 are computed using expensive deep neural networks
                 (DNNs). As these DNNs are expensive and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schmidt:2021:FDA,
  author =       "Tobias Schmidt and Maximilian Bandle and Jana Giceva",
  title =        "A four-dimensional analysis of partitioned approximate
                 filters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2355--2368",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476286",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476286",
  abstract =     "With today's data deluge, approximate filters are
                 particularly attractive to avoid expensive operations
                 like remote data/disk accesses. Among the many filter
                 variants available, it is non-trivial to find the most
                 suitable one and its optimal \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chiosa:2021:SOP,
  author =       "Monica Chiosa and Thomas B. Preu{\ss}er and Gustavo
                 Alonso",
  title =        "{SKT}: a one-pass multi-sketch data analytics
                 accelerator",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2369--2382",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476287",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476287",
  abstract =     "Data analysts often need to characterize a data stream
                 as a first step to its further processing. Some of the
                 initial insights to be gained include, e.g., the
                 cardinality of the data set and its frequency
                 distribution. Such information is typically \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fent:2021:PAG,
  author =       "Philipp Fent and Thomas Neumann",
  title =        "A practical approach to groupjoin and nested
                 aggregates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2383--2396",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476288",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476288",
  abstract =     "Groupjoins, the combined execution of a join and a
                 subsequent group by, are common in analytical queries,
                 and occur in about 1/8 of the queries in TPC-H and
                 TPC-DS. While they were originally invented to improve
                 performance, efficient parallel execution \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wei:2021:RVQ,
  author =       "Ziyun Wei and Immanuel Trummer and Connor Anderson",
  title =        "Robust voice querying with {MUVE}: optimally
                 visualizing results of phonetically similar queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2397--2409",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476289",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476289",
  abstract =     "Recently proposed voice query interfaces translate
                 voice input into SQL queries. Unreliable speech
                 recognition on top of the intrinsic challenges of
                 text-to-SQL translation makes it hard to reliably
                 interpret user input. We present MUVE (Multiplots for
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2021:CCF,
  author =       "Yinjun Wu and James Weimer and Susan B. Davidson",
  title =        "{CHEF}: a cheap and fast pipeline for iteratively
                 cleaning label uncertainties",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2410--2418",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476290",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476290",
  abstract =     "High-quality labels are expensive to obtain for many
                 machine learning tasks, such as medical image
                 classification tasks. Therefore, probabilistic (weak)
                 labels produced by weak supervision tools are used to
                 seed a process in which influential samples \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Siddiqui:2021:CAG,
  author =       "Tarique Siddiqui and Surajit Chaudhuri and Vivek
                 Narasayya",
  title =        "{COMPARE}: accelerating groupwise comparison in
                 relational databases for data analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2419--2431",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476291",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476291",
  abstract =     "Data analysis often involves comparing subsets of data
                 across many dimensions for finding unusual trends and
                 patterns. While the comparison between subsets of data
                 can be expressed using SQL, they tend to be complex to
                 write, and suffer from poor \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Durner:2021:CUC,
  author =       "Dominik Durner and Badrish Chandramouli and Yinan Li",
  title =        "{Crystal}: a unified cache storage system for
                 analytical databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2432--2444",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476292",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476292",
  abstract =     "Cloud analytical databases employ a disaggregated
                 storage model, where the elastic compute layer accesses
                 data persisted on remote cloud storage in
                 block-oriented columnar formats. Given the high latency
                 and low bandwidth to remote storage and the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cetorelli:2021:SEP,
  author =       "Valerio Cetorelli and Paolo Atzeni and Valter
                 Crescenzi and Franco Milicchio",
  title =        "The smallest extraction problem",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2445--2458",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476293",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476293",
  abstract =     "We introduce landmark grammars, a new family of
                 context-free grammars aimed at describing the HTML
                 source code of pages published by large and templated
                 websites and therefore at effectively tackling Web data
                 extraction problems. Indeed, they address \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Thirumuruganathan:2021:DLB,
  author =       "Saravanan Thirumuruganathan and Han Li and Nan Tang
                 and Mourad Ouzzani and Yash Govind and Derek Paulsen
                 and Glenn Fung and AnHai Doan",
  title =        "Deep learning for blocking in entity matching: a
                 design space exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2459--2472",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476294",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476294",
  abstract =     "Entity matching (EM) finds data instances that refer
                 to the same real-world entity. Most EM solutions
                 perform blocking then matching. Many works have applied
                 deep learning (DL) to matching, but far fewer works
                 have applied DL to blocking. These blocking \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:GID,
  author =       "Wentao Zhang and Zhi Yang and Yexin Wang and Yu Shen
                 and Yang Li and Liang Wang and Bin Cui",
  title =        "{GRAIN}: improving data efficiency of {\em gra\/}ph
                 neural networks via diversified {\em in\/}fluence
                 maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2473--2482",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476295",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476295",
  abstract =     "Data selection methods, such as active learning and
                 core-set selection, are useful tools for improving the
                 data efficiency of deep learning models on large-scale
                 datasets. However, recent deep learning models have
                 moved forward from independent and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bandle:2021:DTM,
  author =       "Maximilian Bandle and Jana Giceva",
  title =        "Database technology for the masses: sub-operators as
                 first-class entities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2483--2490",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476296",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476296",
  abstract =     "A wealth of technology has evolved around relational
                 databases over decades that has been successfully tried
                 and tested in many settings and use cases. Yet, the
                 majority of it remains overlooked in the pursuit of
                 performance (e.g., NoSQL) or new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gupta:2021:CSL,
  author =       "Pranjal Gupta and Amine Mhedhbi and Semih Salihoglu",
  title =        "Columnar storage and list-based processing for graph
                 database management systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2491--2504",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476297",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476297",
  abstract =     "We revisit column-oriented storage and query
                 processing techniques in the context of contemporary
                 graph database management systems (GDBMSs). Similar to
                 column-oriented RDBMSs, GDBMSs support read-heavy
                 analytical workloads that however have \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2021:PLB,
  author =       "Yiwen Zhu and Matteo Interlandi and Abhishek Roy and
                 Krishnadhan Das and Hiren Patel and Malay Bag and
                 Hitesh Sharma and Alekh Jindal",
  title =        "{Phoebe}: a learning-based checkpoint optimizer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2505--2518",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476298",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476298",
  abstract =     "Easy-to-use programming interfaces paired with
                 cloud-scale processing engines have enabled big data
                 system users to author arbitrarily complex analytical
                 jobs over massive volumes of data. However, as the
                 complexity and scale of analytical jobs increase,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Nargesian:2021:TDS,
  author =       "Fatemeh Nargesian and Abolfazl Asudeh and H. V.
                 Jagadish",
  title =        "Tailoring data source distributions for fairness-aware
                 data integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2519--2532",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476299",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476299",
  abstract =     "Data scientists often develop data sets for analysis
                 by drawing upon sources of data available to them. A
                 major challenge is to ensure that the data set used for
                 analysis has an appropriate representation of relevant
                 (demographic) groups: it meets \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bansal:2021:MVI,
  author =       "Parikshit Bansal and Prathamesh Deshpande and Sunita
                 Sarawagi",
  title =        "Missing value imputation on multidimensional time
                 series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2533--2545",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476300",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476300",
  abstract =     "We present DeepMVI, a deep learning method for missing
                 value imputation in multidimensional time-series
                 datasets. Missing values are commonplace in decision
                 support platforms that aggregate data over long time
                 stretches from disparate sources, whereas \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rezig:2021:HSD,
  author =       "El Kindi Rezig and Mourad Ouzzani and Walid G. Aref
                 and Ahmed K. Elmagarmid and Ahmed R. Mahmood and
                 Michael Stonebraker",
  title =        "{Horizon}: scalable dependency-driven data cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2546--2554",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476301",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476301",
  abstract =     "A large class of data repair algorithms rely on
                 integrity constraints to detect and repair errors. A
                 well-studied class of constraints is Functional
                 Dependencies (FDs, for short). Although there has been
                 an increased interest in developing general data
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shaowang:2021:DDS,
  author =       "Ted Shaowang and Nilesh Jain and Dennis D. Matthews
                 and Sanjay Krishnan",
  title =        "Declarative data serving: the future of machine
                 learning inference on the edge",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2555--2562",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476302",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476302",
  abstract =     "Recent advances in computer architecture and
                 networking have ushered in a new age of edge computing,
                 where computation is placed close to the point of data
                 collection to facilitate low-latency decision making.
                 As the complexity of such deployments grow \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2021:APS,
  author =       "Junwen Yang and Yeye He and Surajit Chaudhuri",
  title =        "Auto-pipeline: synthesizing complex data pipelines
                 by-target using reinforcement learning and search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2563--2575",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476303",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476303",
  abstract =     "Recent work has made significant progress in helping
                 users to automate single data preparation steps, such
                 as string-transformations and table-manipulation
                 operators (e.g., Join, GroupBy, Pivot, etc.). We in
                 this work propose to automate multiple such \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lockhart:2021:EIQ,
  author =       "Brandon Lockhart and Jinglin Peng and Weiyuan Wu and
                 Jiannan Wang and Eugene Wu",
  title =        "Explaining inference queries with {Bayesian}
                 optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2576--2585",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476304",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476304",
  abstract =     "Obtaining an explanation for an SQL query result can
                 enrich the analysis experience, reveal data errors, and
                 provide deeper insight into the data. Inference query
                 explanation seeks to explain unexpected aggregate query
                 results on inference data; such \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:DBF,
  author =       "Chunwei Liu and Hao Jiang and John Paparrizos and
                 Aaron J. Elmore",
  title =        "Decomposed bounded floats for fast compression and
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2586--2598",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476305",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476305",
  abstract =     "Modern data-intensive applications often generate
                 large amounts of low precision float data with a
                 limited range of values. Despite the prevalence of such
                 data, there is a lack of an effective solution to
                 ingest, store, and analyze bounded, low-precision,
                 numeric data. To address this gap, we propose Buff, a
                 new compression technique that uses a decomposed
                 columnar storage and encoding methods to provide
                 effective compression, fast ingestion, and high-speed
                 in-situ adaptive query operators with SIMD support.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tziavelis:2021:BEJ,
  author =       "Nikolaos Tziavelis and Wolfgang Gatterbauer and Mirek
                 Riedewald",
  title =        "Beyond equi-joins: ranking, enumeration and
                 factorization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2599--2612",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476306",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476306",
  abstract =     "We study theta-joins in general and join predicates
                 with conjunctions and disjunctions of inequalities in
                 particular, focusing on ranked enumeration where the
                 answers are returned incrementally in an order dictated
                 by a given ranking function. Our \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jacob:2021:EBE,
  author =       "Vincent Jacob and Fei Song and Arnaud Stiegler and
                 Bijan Rad and Yanlei Diao and Nesime Tatbul",
  title =        "{Exathlon}: a benchmark for explainable anomaly
                 detection over time series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2613--2626",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476307",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476307",
  abstract =     "Access to high-quality data repositories and
                 benchmarks have been instrumental in advancing the
                 state of the art in many experimental research domains.
                 While advanced analytics tasks over time series data
                 have been gaining lots of attention, lack of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kuchnik:2021:PCR,
  author =       "Michael Kuchnik and George Amvrosiadis and Virginia
                 Smith",
  title =        "Progressive compressed records: taking a byte out of
                 deep learning data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2627--2641",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476308",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476308",
  abstract =     "Deep learning accelerators efficiently train over vast
                 and growing amounts of data, placing a newfound burden
                 on commodity networks and storage devices. A common
                 approach to conserve bandwidth involves resizing or
                 compressing data prior to training. We \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Alsaudi:2021:TFQ,
  author =       "Abdulrahman Alsaudi and Yasser Altowim and Sharad
                 Mehrotra and Yaming Yu",
  title =        "{TQEL}: framework for query-driven linking of top-$k$
                 entities in social media blogs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "11",
  pages =        "2642--2654",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476249.3476309",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 18:05:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476249.3476309",
  abstract =     "Social media analysis over blogs (such as tweets)
                 often requires determining top-k mentions of a certain
                 category (e.g., movies) in a collection (e.g., tweets
                 collected over a given day). Such queries require
                 entity linking (EL) function to be executed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chan:2021:KEN,
  author =       "Tsz Nam Chan and Pak Lon Ip and Leong Hou U. and Weng
                 Hou Tong and Shivansh Mittal and Ye Li and Reynold
                 Cheng",
  title =        "{KDV-explorer}: a near real-time kernel density
                 visualization system for spatial analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2655--2658",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476312",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476312",
  abstract =     "Kernel density visualization (KDV) is a commonly used
                 visualization tool for many spatial analysis tasks,
                 including disease outbreak detection, crime hotspot
                 detection, and traffic accident hotspot detection.
                 Although the most popular geographical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:RRI,
  author =       "Zhebin Zhang and Dajie Dong and Yuhang Ma and Yilong
                 Ying and Dawei Jiang and Ke Chen and Lidan Shou and
                 Gang Chen",
  title =        "{Refiner}: a reliable incentive-driven federated
                 learning system powered by blockchain",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2659--2662",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476313",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476313",
  abstract =     "Modern mobile applications often produce decentralized
                 data, i.e., a huge amount of privacy-sensitive data
                 distributed over a large number of mobile devices.
                 Techniques for learning models from decentralized data
                 must properly handle two natures of such \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Uotila:2021:MMM,
  author =       "Valter Uotila and Jiaheng Lu and Dieter Gawlick and
                 Zhen Hua Liu and Souripriya Das and Gregory
                 Pogossiants",
  title =        "{MultiCategory}: multi-model query processing meets
                 category theory and functional programming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2663--2666",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476314",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476314",
  abstract =     "The variety of data is one of the important issues in
                 the era of Big Data. The data are naturally organized
                 in different formats and models, including structured
                 data, semi-structured data, and unstructured data.
                 Prior research has envisioned an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:CCQ,
  author =       "Qichen Wang and Chaoqi Zhang and Danish Alsayed and Ke
                 Yi and Bin Wu and Feifei Li and Chaoqun Zhan",
  title =        "Cquirrel: continuous query processing over acyclic
                 relational schemas",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2667--2670",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476315",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476315",
  abstract =     "We will demonstrate Cquirrel, a continuous query
                 processing engine built on top of Flink. Cquirrel
                 assumes a relational schema where the foreign-key
                 constraints form a directed acyclic graph, and supports
                 any selection-projection-join-aggregation query
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mao:2021:DDF,
  author =       "Yuetian Mao and Shuai Yuan and Nan Cui and Tianjiao Du
                 and Beijun Shen and Yuting Chen",
  title =        "{DeFiHap}: detecting and fixing {HiveQL}
                 anti-patterns",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2671--2674",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476316",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476316",
  abstract =     "The emergence of Hive greatly facilitates the
                 management of massive data stored in various places.
                 Meanwhile, data scientists face challenges during
                 HiveQL programming --- they may not use correct and/or
                 efficient HiveQL statements in their programs;
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Helal:2021:DKD,
  author =       "Ahmed Helal and Mossad Helali and Khaled Ammar and
                 Essam Mansour",
  title =        "A demonstration of {KGLac}: a data discovery and
                 enrichment platform for data science",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2675--2678",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476317",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476317",
  abstract =     "Data science growing success relies on knowing where a
                 relevant dataset exists, understanding its impact on a
                 specific task, finding ways to enrich a dataset, and
                 leveraging insights derived from it. With the growth of
                 open data initiatives, data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Faure-Giovagnoli:2021:AVL,
  author =       "Pierre Faure-Giovagnoli and Marie {Le Guilly} and
                 Jean-Marc Petit and Vasile-Marian Scuturici",
  title =        "{ADESIT}: visualize the limits of your data in a
                 machine learning process",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2679--2682",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476318",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476318",
  abstract =     "Thanks to the numerous machine learning tools
                 available to us nowadays, it is easier than ever to
                 derive a model from a dataset in the frame of a
                 supervised learning problem. However, when this model
                 behaves poorly compared with an expected performance,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yan:2021:PAM,
  author =       "Yinzhao Yan and Raymond Chi-Wing Wong",
  title =        "Path advisor: a multi-functional campus map tool for
                 shortest path",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2683--2686",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476319",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476319",
  abstract =     "The shortest path in both the two dimensional (2D)
                 plane and the three dimensional (3D) terrain is
                 extensively used both in industry and academia.
                 Although there are some map visualization tools for
                 viewing the shortest path in 2D and 3D views, we find
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:IHL,
  author =       "Liangde Li and Supun Nakandala and Arun Kumar",
  title =        "Intermittent human-in-the-loop model selection using
                 {Cerebro}: a demonstration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2687--2690",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476320",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476320",
  abstract =     "Deep learning (DL) is revolutionizing many fields.
                 However, there is a major bottleneck for the wide
                 adoption of DL: the pain of model selection, which
                 requires exploring a large config space of model
                 architecture and training hyper-parameters before
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Funke:2021:LLC,
  author =       "Henning Funke and Jens Teubner",
  title =        "Low-latency compilation of {SQL} queries to machine
                 code",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2691--2694",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476321",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476321",
  abstract =     "Query compilation has proven to be one of the most
                 efficient query processing techniques. Despite its fast
                 processing speed, the additional compilation times of
                 the technique limit its applicability. This is because
                 the approach is most beneficial only \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Groppe:2021:SDS,
  author =       "Sven Groppe and Rico Klinckenberg and Benjamin
                 Warnke",
  title =        "Sound of databases: sonification of a semantic web
                 database engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2695--2698",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476322",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476322",
  abstract =     "Sonifications map data to auditory dimensions and
                 offer a new audible experience to their listeners. We
                 propose a sonification of query processing paired with
                 a corresponding visualization both integrated in a web
                 application. In this demonstration we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2021:HHM,
  author =       "Zihao Chen and Zhizhen Xu and Chen Xu and Juan Soto
                 and Volker Markl and Weining Qian and Aoying Zhou",
  title =        "{HyMAC}: a hybrid matrix computation system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2699--2702",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476323",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476323",
  abstract =     "Distributed matrix computation is common in
                 large-scale data processing and machine learning
                 applications. Iterative-convergent algorithms involving
                 matrix computation share a common property: parameters
                 converge non-uniformly. This property can be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xu:2021:GOS,
  author =       "Jingbo Xu and Zhanning Bai and Wenfei Fan and Longbin
                 Lai and Xue Li and Zhao Li and Zhengping Qian and Lei
                 Wang and Lei Wang and Yanyan Wang and Wenyuan Yu and
                 Jingren Zhou",
  title =        "{GraphScope}: a one-stop large graph processing
                 system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2703--2706",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476324",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476324",
  abstract =     "Due to diverse graph data and algorithms, programming
                 and orchestration of complex computation pipelines have
                 become the major challenges to making use of graph
                 applications for Web-scale data analysis. GraphScope
                 aims to provide a one-stop and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Renz-Wieland:2021:JMI,
  author =       "Alexander Renz-Wieland and Tobias Drobisch and Zoi
                 Kaoudi and Rainer Gemulla and Volker Markl",
  title =        "Just move it!: dynamic parameter allocation in
                 action",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2707--2710",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476325",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476325",
  abstract =     "Parameter servers (PSs) ease the implementation of
                 distributed machine learning systems, but their
                 performance can fall behind that of single machine
                 baselines due to communication overhead. We demonstrate
                 Lapse, an open source PS with dynamic parameter
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Orogat:2021:CDC,
  author =       "Abdelghny Orogat and Ahmed El-Roby",
  title =        "{CBench}: demonstrating comprehensive evaluation of
                 question answering systems over knowledge graphs
                 through deep analysis of benchmarks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2711--2714",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476326",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476326",
  abstract =     "A plethora of question answering (QA) systems that
                 retrieve answers to natural language questions from
                 knowledge graphs have been developed in recent years.
                 However, choosing a benchmark to accurately assess the
                 quality of a question answering system is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Woltmann:2021:PPM,
  author =       "Lucas Woltmann and Dominik Olwig and Claudio Hartmann
                 and Dirk Habich and Wolfgang Lehner",
  title =        "{PostCENN}: {postgreSQL} with machine learning models
                 for cardinality estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2715--2718",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476327",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476327",
  abstract =     "In this demo, we present PostCENN, an enhanced
                 PostgreSQL database system with an end-to-end
                 integration of machine learning (ML) models for
                 cardinality estimation. In general, cardinality
                 estimation is a topic with a long history in the
                 database \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:DDU,
  author =       "Jinyang Li and Yuval Moskovitch and H. V. Jagadish",
  title =        "{DENOUNCER}: detection of unfairness in classifiers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2719--2722",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476328",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476328",
  abstract =     "The use of automated data-driven tools for
                 decision-making has gained popularity in recent years.
                 At the same time, the reported cases of algorithmic
                 bias and discrimination increase as well, which in turn
                 lead to an extensive study of algorithmic \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Abbar:2021:DQM,
  author =       "Sofiane Abbar and Rade Stanojevic and Mashaal Musleh
                 and Mohamed ElShrif and Mohamed Mokbel",
  title =        "A demonstration of {QARTA}: an {ML}-based system for
                 accurate map services",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2723--2726",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476329",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476329",
  abstract =     "This demo presents QARTA; an open-source full-fledged
                 system for highly accurate and scalable map services.
                 QARTA employs machine learning techniques to: (a)
                 construct its own highly accurate map in terms of both
                 map topology and edge weights, and (b) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Smith:2021:TTN,
  author =       "Jaclyn Smith and Michael Benedikt and Brandon Moore
                 and Milos Nikolic",
  title =        "{TraNCE}: transforming nested collections
                 efficiently",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2727--2730",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476330",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476330",
  abstract =     "Nested relational query languages have long been seen
                 as an attractive tool for scenarios involving large
                 hierarchical datasets. There has been a resurgence of
                 interest in nested relational languages. One driver has
                 been the affinity of these languages \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Diestelkamper:2021:DMA,
  author =       "Ralf Diestelk{\"a}mper and Seokki Lee and Boris Glavic
                 and Melanie Herschel",
  title =        "Debugging missing answers for spark queries over
                 nested data with {Breadcrumb}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2731--2734",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476331",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476331",
  abstract =     "We present Breadcrumb, a system that aids developers
                 in debugging queries through query-based explanations
                 for missing answers. Given as input a query and an
                 expected, but missing, query result, Breadcrumb
                 identifies operators in the input query that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2021:DPW,
  author =       "Renzhi Wu and Prem Sakala and Peng Li and Xu Chu and
                 Yeye He",
  title =        "Demonstration of {Panda}: a weakly supervised entity
                 matching system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2735--2738",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476332",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476332",
  abstract =     "Entity matching (EM) refers to the problem of
                 identifying tuple pairs in one or more relations that
                 refer to the same real world entities. Supervised
                 machine learning (ML) approaches, and deep learning
                 based approaches in particular, typically achieve
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:ADAb,
  author =       "Jiabin Liu and Fu Zhu and Chengliang Chai and Yuyu Luo
                 and Nan Tang",
  title =        "Automatic data acquisition for deep learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2739--2742",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476333",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476333",
  abstract =     "Deep learning (DL) has widespread applications and has
                 revolutionized many industries. Although automated
                 machine learning (AutoML) can help us away from coding
                 for DL models, the acquisition of lots of high-quality
                 data for model training remains a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2021:DSD,
  author =       "Xuanhe Zhou and Lianyuan Jin and Ji Sun and Xinyang
                 Zhao and Xiang Yu and Jianhua Feng and Shifu Li and
                 Tianqing Wang and Kun Li and Luyang Liu",
  title =        "{DBMind}: a self-driving platform in {openGauss}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2743--2746",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476334",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476334",
  abstract =     "We demonstrate a self-driving system DBMind, which
                 provides three autonomous capabilities in database,
                 including self-monitoring, self-diagnosis and
                 self-optimization. First, self-monitoring judiciously
                 collects database metrics and detects anomalies
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2021:DDE,
  author =       "Qiongqiong Lin and Jiayao Zhang and Jinfei Liu and Kui
                 Ren and Jian Lou and Junxu Liu and Li Xiong and Jian
                 Pei and Jimeng Sun",
  title =        "Demonstration of dealer: an end-to-end model
                 marketplace with differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2747--2750",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476335",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476335",
  abstract =     "Data-driven machine learning (ML) has witnessed great
                 success across a variety of application domains. Since
                 ML model training relies on a large amount of data,
                 there is a growing demand for high-quality data to be
                 collected for ML model training. Data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mu:2021:AAC,
  author =       "Tianyu Mu and Hongzhi Wang and Shenghe Zheng and
                 Shaoqing Zhang and Cheng Liang and Haoyun Tang",
  title =        "{Assassin}: an automatic classification system based
                 on algorithm selection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2751--2754",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476336",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476336",
  abstract =     "The increasing complexity of data analysis tasks makes
                 it dependent on human expertise and challenging for
                 non-experts. One of the major challenges faced in data
                 analysis is the selection of the proper algorithm for
                 given tasks and data sets. Motivated \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cao:2021:AMD,
  author =       "Lei Cao and Dongqing Xiao and Yizhou Yan and Samuel
                 Madden and Guoliang Li",
  title =        "{ATLANTIC}: making database differentially private and
                 faster with accuracy guarantee",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2755--2758",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476337",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476337",
  abstract =     "Differential privacy promises to enable data sharing
                 and general data analytics while protecting individual
                 privacy. Because the private data is often stored in
                 the form of relational database that supports SQL
                 queries, making SQL-based analytics \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xie:2021:DMS,
  author =       "Anze Xie and Anders Carlsson and Jason Mohoney and
                 Roger Waleffe and Shanan Peters and Theodoros
                 Rekatsinas and Shivaram Venkataraman",
  title =        "Demo of {Marius}: a system for large-scale graph
                 embeddings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2759--2762",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476338",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476338",
  abstract =     "Graph embeddings have emerged as the de facto
                 representation for modern machine learning over graph
                 data structures. The goal of graph embedding models is
                 to convert high-dimensional sparse graphs into
                 low-dimensional, dense and continuous vector spaces
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Muller:2021:PPO,
  author =       "Heiko M{\"u}ller and Sonia Castelo and Munaf Qazi and
                 Juliana Freire",
  title =        "From papers to practice: the \pkg{openclean}
                 open-source data cleaning library",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2763--2766",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476339",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476339",
  abstract =     "Data preparation is still a major bottleneck for many
                 data science projects. Even though many sophisticated
                 algorithms and tools have been proposed in the research
                 literature, it is difficult for practitioners to
                 integrate them into their data wrangling \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ge:2021:DAD,
  author =       "Yongming Ge and Vanessa Lin and Maureen Daum and
                 Brandon Haynes and Alvin Cheung and Magdalena
                 Balazinska",
  title =        "Demonstration of apperception: a database management
                 system for geospatial video data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2767--2770",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476340",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476340",
  abstract =     "Many recent video applications---including traffic
                 monitoring, drone analytics, autonomous driving, and
                 virtual reality---require piecing together, combining,
                 and operating over many related video streams. Despite
                 the massive data volumes involved and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Karatzoglidi:2021:AEC,
  author =       "Mary Karatzoglidi and Paraskevas Kerasiotis and Verena
                 Kantere",
  title =        "Automated energy consumption forecasting with
                 {EnForce}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2771--2774",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476341",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476341",
  abstract =     "The need to reduce energy consumption on a global
                 scale has been of high importance during the last
                 years. Research has created methods to make highly
                 accurate forecasts on the energy consumption of
                 buildings and there have been efforts towards the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jang:2021:RWG,
  author =       "Myung-Hwan Jang and Yong-Yeon Jo and Sang-Wook Kim",
  title =        "{RealGraph} web: a graph analysis platform on the
                 web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2775--2778",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476342",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476342",
  abstract =     "In this demo, we present RealGraph$^{Web}$, a
                 web-based platform that provides various kinds of graph
                 analysis services. RealGraph$^{Web}$ is based on
                 RealGraph, a graph engine that addresses the problem of
                 performance degradation in processing real-world big
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ghosh:2021:IDS,
  author =       "Arthita Ghosh and Deven Bansod and Arpit Narechania
                 and Prashanth Dintyala and Su Timurturkan and Joy
                 Arulraj",
  title =        "Interactive demonstration of {SQLCheck}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2779--2782",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476343",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476343",
  abstract =     "We will demonstrate a prototype of sqlcheck, a
                 holistic toolchain for automatically finding and fixing
                 anti-patterns in database applications. The advent of
                 modern database-as-a-service platforms has made it easy
                 for developers to quickly create \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2021:CET,
  author =       "Yiming Lin and Pramod Khargonekar and Sharad Mehrotra
                 and Nalini Venkatasubramanian",
  title =        "{T-cove}: an exposure tracing system based on cleaning
                 wi-fi events on organizational premises",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2783--2786",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476344",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476344",
  abstract =     "WiFi connectivity events, generated when a mobile
                 device connects to WiFi access points can serve as a
                 robust, passive, (almost) zero-cost indoor localization
                 technology. The challenge is the coarse level
                 localization it offers that limits its \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:DGE,
  author =       "Paul Y. Wang and Sainyam Galhotra and Romila Pradhan
                 and Babak Salimi",
  title =        "Demonstration of generating explanations for black-box
                 algorithms using {Lewis}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2787--2790",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476345",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476345",
  abstract =     "Explainable artificial intelligence (XAI) aims to
                 reduce the opacity of AI-based decision-making systems,
                 allowing humans to scrutinize and trust them. Unlike
                 prior work that attributes the responsibility for an
                 algorithm's decisions to its inputs as a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Castelo:2021:ADS,
  author =       "Sonia Castelo and R{\'e}mi Rampin and A{\'e}cio Santos
                 and Aline Bessa and Fernando Chirigati and Juliana
                 Freire",
  title =        "{Auctus}: a dataset search engine for data discovery
                 and augmentation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2791--2794",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476346",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476346",
  abstract =     "The large volumes of structured data currently
                 available, from Web tables to open-data portals and
                 enterprise data, open up new opportunities for progress
                 in answering many important scientific, societal, and
                 business questions. However, finding \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rehman:2021:DRS,
  author =       "Mohammed Suhail Rehman and Silu Huang and Aaron J.
                 Elmore",
  title =        "A demonstration of {RELIC}: a system for retrospective
                 lineage inference of data workflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2795--2798",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476347",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476347",
  abstract =     "The ad-hoc, heterogeneous process of modern data
                 science typically involves loading, cleaning, and
                 mutating dataset(s) into multiple versions recorded as
                 artifacts by various tools within a single data science
                 workflow. Lineage information, including \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2021:SSC,
  author =       "Zhihao Chen and Haizhen Zhuo and Quanqing Xu and
                 Xiaodong Qi and Chengyu Zhu and Zhao Zhang and Cheqing
                 Jin and Aoying Zhou and Ying Yan and Hui Zhang",
  title =        "{SChain}: a scalable consortium blockchain exploiting
                 intra- and inter-block concurrency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2799--2802",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476348",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476348",
  abstract =     "We demonstrate SChain, a consortium blockchain that
                 scales transaction processing to support large-scale
                 enterprise applications. The unique advantage of SChain
                 stems from the exploitation of both intra- and
                 inter-block concurrency. The intra-block \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Anastasiou:2021:EEP,
  author =       "Chrysovalantis Anastasiou and Constantinos Costa and
                 Panos K. Chrysanthis and Cyrus Shahabi",
  title =        "{EPICGen}: an experimental platform for indoor
                 congestion generation and forecasting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2803--2806",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476349",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476349",
  abstract =     "Effectively and accurately forecasting the congestion
                 in indoor spaces has become particularly important
                 during the pandemic in order to reduce the risk of
                 exposure to airborne viruses. However, there is a lack
                 of readily available indoor congestion \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Arnaout:2021:WKB,
  author =       "Hiba Arnaout and Simon Razniewski and Gerhard Weikum
                 and Jeff Z. Pan",
  title =        "{Wikinegata}: a knowledge base with interesting
                 negative statements",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2807--2810",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476350",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476350",
  abstract =     "Databases about general-world knowledge, so-called
                 knowledge bases (KBs), are important in applications
                 such as search and question answering. Traditionally,
                 although KBs use open world assumption, popular KBs
                 only store positive information, but \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2021:FEE,
  author =       "Jinwei Zhu and Kun Cheng and Jiayang Liu and Liang
                 Guo",
  title =        "Full encryption: an end to end encryption mechanism in
                 {GaussDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2811--2814",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476351",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476351",
  abstract =     "In this paper, we present a novel mechanism called
                 Full Encryption (FE) in GaussDB. FE-in-GaussDB provides
                 column-level encryption for sensitive data, and secures
                 the asset from any malicious cloud administrator or
                 information leakage attack. It ensures \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mandamadiotis:2021:DIA,
  author =       "Antonis Mandamadiotis and Stavroula Eleftherakis and
                 Apostolos Glenis and Dimitrios Skoutas and Yannis
                 Stavrakas and Georgia Koutrika",
  title =        "{DatAgent}: the imminent age of intelligent data
                 assistants",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2815--2818",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476352",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476352",
  abstract =     "In this demonstration, we present DatAgent, an
                 intelligent data assistant system that allows users to
                 ask queries in natural language, and can respond in
                 natural language as well. Moreover, the system actively
                 guides the user using different types of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rezig:2021:DDD,
  author =       "El Kindi Rezig and Anshul Bhandari and Anna Fariha and
                 Benjamin Price and Allan Vanterpool and Vijay Gadepally
                 and Michael Stonebraker",
  title =        "{DICE}: data discovery by example",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2819--2822",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476353",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476353",
  abstract =     "In order to conduct analytical tasks, data scientists
                 often need to find relevant data from an avalanche of
                 sources (e.g., data lakes, large organizational
                 databases). This effort is typically made in an ad hoc,
                 non-systematic manner, which makes it a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schuhknecht:2021:AAP,
  author =       "Felix Schuhknecht and Aaron Priesterroth and Justus
                 Henneberg and Reza Salkhordeh",
  title =        "{AnyOLAP}: analytical processing of arbitrary
                 data-intensive applications without {ETL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2823--2826",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476354",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476354",
  abstract =     "The volume of data that is processed and produced by
                 modern data-intensive applications is constantly
                 increasing. Of course, along with the volume, the
                 interest in analyzing and interpreting this data
                 increases as well. As a consequence, more and more
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jacob:2021:DEB,
  author =       "Vincent Jacob and Fei Song and Arnaud Stiegler and
                 Bijan Rad and Yanlei Diao and Nesime Tatbul",
  title =        "A demonstration of the {Exathlon} benchmarking
                 platform for explainable anomaly detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2827--2830",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476355",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476355",
  abstract =     "In this demo, we introduce Exathlon --- a new
                 benchmarking platform for explainable anomaly detection
                 over high-dimensional time series. We designed Exathlon
                 to support data scientists and researchers in
                 developing and evaluating learned models and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shaikhha:2021:IRH,
  author =       "Amir Shaikhha and Maximilian Schleich and Dan
                 Olteanu",
  title =        "An intermediate representation for hybrid database and
                 machine learning workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2831--2834",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476356",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476356",
  abstract =     "IFAQ is an intermediate representation and compilation
                 framework for hybrid database and machine learning
                 workloads expressible using iterative programs with
                 functional aggregate queries. We demonstrate IFAQ for
                 several OLAP queries, linear algebra \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pastor:2021:HDY,
  author =       "Eliana Pastor and Andrew Gavgavian and Elena Baralis
                 and Luca de Alfaro",
  title =        "How divergent is your data?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2835--2838",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476357",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476357",
  abstract =     "We present DivExplorer, a tool that enables users to
                 explore datasets and find subgroups of data for which a
                 classifier behaves in an anomalous manner. These
                 subgroups, denoted as divergent subgroups, may exhibit,
                 for example, higher-than-normal false \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Berro:2021:ERP,
  author =       "Auday Berro and Mohammad-Ali Yaghub Zade Fard and
                 Marcos Baez and Boualem Benatallah and Khalid
                 Benabdeslem",
  title =        "An extensible and reusable pipeline for automated
                 utterance paraphrases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2839--2842",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476358",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476358",
  abstract =     "In this demonstration paper we showcase an extensible
                 and reusable pipeline for automatic paraphrase
                 generation, i.e., reformulating sentences using
                 different words. Capturing the nuances of human
                 language is fundamental to the effectiveness of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Beedkar:2021:CGD,
  author =       "Kaustubh Beedkar and David Brekardin and Jorge-Anulfo
                 Quian{\'e}-Ruiz and Volker Markl",
  title =        "Compliant geo-distributed data processing in action",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2843--2846",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476359",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476359",
  abstract =     "In this paper we present our work on compliant
                 geo-distributed data processing. Our work focuses on
                 the new dimension of dataflow constraints that regulate
                 the movement of data across geographical or
                 institutional borders. For example, European \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yadav:2021:QDV,
  author =       "Piyush Yadav and Dhaval Salwala and Felipe Arruda
                 Pontes and Praneet Dhingra and Edward Curry",
  title =        "Query-driven video event processing for the {Internet
                 of Multimedia Things}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2847--2850",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476360",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476360",
  abstract =     "Advances in Deep Neural Network (DNN) techniques have
                 revolutionized video analytics and unlocked the
                 potential for querying and mining video event patterns.
                 This paper details GNOSIS, an event processing platform
                 to perform near-real-time video event \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Koutroumanis:2021:DNU,
  author =       "Nikolaos Koutroumanis and Nikolaos Kousathanas and
                 Christos Doulkeridis and Akrivi Vlachou",
  title =        "A demonstration of {NoDA}: unified access to {NoSQL}
                 stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2851--2854",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476361",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476361",
  abstract =     "In this demo paper, we present a system prototype,
                 called NoDA, that unifies access to NoSQL stores, by
                 exposing a single interface to big data developers.
                 This hides the heterogeneity of NoSQL stores, in terms
                 of different query languages, non-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sen:2021:APP,
  author =       "Rathijit Sen and Abhishek Roy and Alekh Jindal and Rui
                 Fang and Jeff Zheng and Xiaolei Liu and Ruiping Li",
  title =        "{AutoExecutor}: predictive parallelism for {Spark SQL}
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2855--2858",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476362",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476362",
  abstract =     "Right-sizing resources for query execution is
                 important for cost-efficient performance, but
                 estimating how performance is affected by resource
                 allocations, upfront, before query execution is
                 difficult. We demonstrate AutoExecutor, a predictive
                 system \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:CBA,
  author =       "Jiaxiang Liu and Karl Knopf and Yiqing Tan and Bolin
                 Ding and Xi He",
  title =        "Catch a blowfish alive: a demonstration of
                 policy-aware differential privacy for interactive data
                 exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2859--2862",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476363",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476363",
  abstract =     "Policy-aware differential privacy (DP) frameworks such
                 as Blowfish privacy enable more accurate query answers
                 than standard DP. In this work, we build the first
                 policy-aware DP system for interactive data
                 exploration, BlowfishDB, that aims to (i) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ouellette:2021:RDL,
  author =       "Paul Ouellette and Aidan Sciortino and Fatemeh
                 Nargesian and Bahar Ghadiri Bashardoost and Erkang Zhu
                 and Ken Q. Pu and Ren{\'e}e J. Miller",
  title =        "{RONIN}: data lake exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2863--2866",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476364",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476364",
  abstract =     "Dataset discovery can be performed using search (with
                 a query or keywords) to find relevant data. However,
                 the result of this discovery can be overwhelming to
                 explore. Existing navigation techniques mostly focus on
                 linkage graphs that enable navigation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Boniol:2021:SAS,
  author =       "Paul Boniol and John Paparrizos and Themis Palpanas
                 and Michael J. Franklin",
  title =        "{SAND} in action: subsequence anomaly detection for
                 streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2867--2870",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476365",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476365",
  abstract =     "Subsequence anomaly detection in long data series is a
                 significant problem. While the demand for real-time
                 analytics and decision making increases, anomaly
                 detection methods have to operate over streams and
                 handle drifts in data distribution. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Koutras:2021:VAM,
  author =       "Christos Koutras and Kyriakos Psarakis and George
                 Siachamis and Andra Ionescu and Marios Fragkoulis and
                 Angela Bonifati and Asterios Katsifodimos",
  title =        "{Valentine} in action: matching tabular data at
                 scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2871--2874",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476366",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476366",
  abstract =     "Capturing relationships among heterogeneous datasets
                 in large data lakes --- traditionally termed schema
                 matching --- is one of the most challenging problems
                 that corporations and institutions face nowadays.
                 Discovering and integrating datasets heavily \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Guan:2021:GDE,
  author =       "Sheng Guan and Hanchao Ma and Sutanay Choudhury and
                 Yinghui Wu",
  title =        "{GEDet}: detecting erroneous nodes with a few
                 examples",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2875--2878",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476367",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476367",
  abstract =     "Detecting nodes with erroneous values in real-world
                 graphs remains challenging due to the lack of examples
                 and various error scenarios. We demonstrate GEDet, an
                 error detection engine that can detect erroneous nodes
                 in graphs with a few examples. The \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2021:GUE,
  author =       "Wenfei Fan and Tao He and Longbin Lai and Xue Li and
                 Yong Li and Zhao Li and Zhengping Qian and Chao Tian
                 and Lei Wang and Jingbo Xu and Youyang Yao and Qiang
                 Yin and Wenyuan Yu and Jingren Zhou and Diwen Zhu and
                 Rong Zhu",
  title =        "{GraphScope}: a unified engine for big graph
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2879--2892",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476369",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476369",
  abstract =     "GraphScope is a system and a set of language
                 extensions that enable a new programming interface for
                 large-scale distributed graph computing. It generalizes
                 previous graph processing frameworks (e.g., Pregel,
                 GraphX) and distributed graph databases (e.g.,.
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shang:2021:DSI,
  author =       "Zeyuan Shang and Emanuel Zgraggen and Benedetto
                 Buratti and Philipp Eichmann and Navid Karimeddiny and
                 Charlie Meyer and Wesley Runnels and Tim Kraska",
  title =        "{Davos}: a system for interactive data-driven decision
                 making",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2893--2905",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476370",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476370",
  abstract =     "Recently, a new horizon in data analytics,
                 prescriptive analytics, is becoming more and more
                 important to make data-driven decisions. As opposed to
                 the progress of democratizing data acquisition and
                 access, making data-driven decisions remains a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Qin:2021:MEU,
  author =       "An Qin and Mengbai Xiao and Yongwei Wu and Xinjie
                 Huang and Xiaodong Zhang",
  title =        "{Mixer}: efficiently understanding and retrieving
                 visual content at web-scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2906--2917",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476371",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476371",
  abstract =     "Visual contents, including images and videos, are
                 dominant on the Internet today. The conventional search
                 engine is mainly designed for textual documents, which
                 must be extended to process and manage increasingly
                 high volumes of visual data objects. In \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Justo:2021:TPF,
  author =       "David Justo and Shaoqing Yi and Lukas Stadler and
                 Nadia Polikarpova and Arun Kumar",
  title =        "Towards a polyglot framework for factorized {ML}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2918--2931",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476372",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476372",
  abstract =     "Optimizing machine learning (ML) workloads on
                 structured data is a key concern for data platforms.
                 One class of optimizations called ``factorized ML''
                 helps reduce ML runtimes over multi-table datasets by
                 pushing ML computations down through joins, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dayan:2021:EML,
  author =       "Niv Dayan and Moshe Twitto and Yuval Rochman and Uri
                 Beitler and Itai {Ben Zion} and Edward Bortnikov and
                 Shmuel Dashevsky and Ofer Frishman and Evgeni Ginzburg
                 and Igal Maly and Avraham (Poza) Meir and Mark Mokryn
                 and Iddo Naiss and Noam Rabinovich",
  title =        "The end of {Moore}'s law and the rise of the data
                 processor",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2932--2944",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476373",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476373",
  abstract =     "With the end of Moore's Law, database architects are
                 turning to hardware accelerators to offload
                 computationally intensive tasks from the CPU. In this
                 paper, we show that accelerators can facilitate far
                 more than just computation: they enable algorithms and
                 data structures that lavishly expand computation in
                 order to optimize for disparate cost metrics. We
                 introduce the Pliops Extreme Data Processor (XDP), a
                 novel storage engine implemented from the ground up
                 using customized hardware. At its core, XDP consists of
                 an accelerated hash table to index the data in storage
                 using less memory and fewer storage accesses for
                 queries than the best alternative. XDP also employs an
                 accelerated compressor, a capacitor, and a lock-free
                 RAID sub-system to minimize storage space and recovery
                 time while minimizing performance penalties. As a
                 result, XDP overcomes cost contentions that have so far
                 been inescapable.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Murray:2021:TDM,
  author =       "Derek G. Murray and Jir{\'\i} Simsa and Ana Klimovic
                 and Ihor Indyk",
  title =        "\pkg{tf.data}: a machine learning data processing
                 framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2945--2958",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476374",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476374",
  abstract =     "Training machine learning models requires feeding
                 input data for models to ingest. Input pipelines for
                 machine learning jobs are often challenging to
                 implement efficiently as they require reading large
                 volumes of data, applying complex transformations,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Eltabakh:2021:BBA,
  author =       "Mohamed Eltabakh and Anantha Subramanian and Awny
                 Al-Omari and Mohammed Al-Kateb and Sanjay Nair and
                 Mahbub Hasan and Wellington Cabrera and Charles Zhang
                 and Amit Kishore and Snigdha Prasad",
  title =        "Not black-box anymore!: enabling analytics-aware
                 optimizations in {Teradata Vantage}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2959--2971",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476375",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476375",
  abstract =     "Teradata Vantage is a platform for integrating a broad
                 range of analytical functions and capabilities with the
                 Teradata's SQL engine. One of the main challenges in
                 optimizing the execution of these analytical functions
                 is that many of them are not only \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2021:FAE,
  author =       "Yingda Chen and Jiamang Wang and Yifeng Lu and Ying
                 Han and Zhiqiang Lv and Xuebin Min and Hua Cai and Wei
                 Zhang and Haochuan Fan and Chao Li and Tao Guan and Wei
                 Lin and Yangqing Jia and Jingren Zhou",
  title =        "{Fangorn}: adaptive execution framework for
                 heterogeneous workloads on shared clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2972--2985",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476376",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476376",
  abstract =     "Pervasive needs for data explorations at all scales
                 have populated modern distributed platforms with
                 workloads of different characteristics. The growing
                 complexities and diversities have thereafter imposed
                 distinct challenges to execute them on shared
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Agiwal:2021:NPS,
  author =       "Ankur Agiwal and Kevin Lai and Gokul Nath Babu
                 Manoharan and Indrajit Roy and Jagan Sankaranarayanan
                 and Hao Zhang and Tao Zou and Min Chen and Zongchang
                 (Jim) Chen and Ming Dai and Thanh Do and Haoyu Gao and
                 Haoyan Geng and Raman Grover and Bo Huang and Yanlai
                 Huang and Zhi (Adam) Li and Jianyi Liang and Tao Lin
                 and Li Liu and Yao Liu and Xi Mao and Yalan (Maya) Meng
                 and Prashant Mishra and Jay Patel and Rajesh S. R. and
                 Vijayshankar Raman and Sourashis Roy and Mayank Singh
                 Shishodia and Tianhang Sun and Ye (Justin) Tang and
                 Junichi Tatemura and Sagar Trehan and Ramkumar Vadali
                 and Prasanna Venkatasubramanian and Gensheng Zhang and
                 Kefei Zhang and Yupu Zhang and Zeleng Zhuang and Goetz
                 Graefe and Divyakant Agrawal and Jeff Naughton and
                 Sujata Kosalge and Hakan Hac{\i}g{\"u}m{\"u}{\c{s}}",
  title =        "Napa: powering scalable data warehousing with robust
                 query performance at {Google}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2986--2997",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476377",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476377",
  abstract =     "Google services continuously generate vast amounts of
                 application data. This data provides valuable insights
                 to business users. We need to store and serve these
                 planet-scale data sets under the extremely demanding
                 requirements of scalability, sub-second \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2021:ABR,
  author =       "Rubao Lee and Minghong Zhou and Chi Li and Shenggang
                 Hu and Jianping Teng and Dongyang Li and Xiaodong
                 Zhang",
  title =        "The art of balance: a {RateupDBTM} experience of
                 building a {CPU\slash GPU} hybrid database product",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "2999--3013",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476378",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476378",
  abstract =     "GPU-accelerated database systems have been studied for
                 more than 10 years, ranging from prototyping
                 development to industry products serving in multiple
                 domains of data applications. Existing GPU database
                 research solutions are often focused on specific
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cheng:2021:RTL,
  author =       "Audrey Cheng and Xiao Shi and Lu Pan and Anthony
                 Simpson and Neil Wheaton and Shilpa Lawande and Nathan
                 Bronson and Peter Bailis and Natacha Crooks and Ion
                 Stoica",
  title =        "{RAMP-TAO}: layering atomic transactions on
                 {Facebook}'s online {TAO} data store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3014--3027",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476379",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476379",
  abstract =     "Facebook's graph store TAO, like many other
                 distributed data stores, traditionally prioritizes
                 availability, efficiency, and scalability over strong
                 consistency or isolation guarantees to serve its large,
                 read-dominant workloads. As product developers
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:OAD,
  author =       "Guoliang Li and Xuanhe Zhou and Ji Sun and Xiang Yu
                 and Yue Han and Lianyuan Jin and Wenbo Li and Tianqing
                 Wang and Shifu Li",
  title =        "{openGauss}: an autonomous database system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3028--3042",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476380",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476380",
  abstract =     "Although learning-based database optimization
                 techniques have been studied from academia in recent
                 years, they have not been widely deployed in commercial
                 database systems. In this work, we build an autonomous
                 database framework and integrate our \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Potharaju:2021:HIS,
  author =       "Rahul Potharaju and Terry Kim and Eunjin Song and
                 Wentao Wu and Lev Novik and Apoorve Dave and Andrew
                 Fogarty and Pouria Pirzadeh and Vidip Acharya and
                 Gurleen Dhody and Jiying Li and Sinduja Ramanujam and
                 Nicolas Bruno and C{\'e}sar A. Galindo-Legaria and
                 Vivek Narasayya and Surajit Chaudhuri and Anil K. Nori
                 and Tomas Talius and Raghu Ramakrishnan",
  title =        "Hyperspace: the indexing subsystem of {Azure
                 Synapse}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3043--3055",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476382",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476382",
  abstract =     "Microsoft recently introduced Azure Synapse Analytics,
                 which offers an integrated experience across data
                 ingestion, storage, and querying in Apache Spark and
                 T-SQL over data in the lake, including files and
                 warehouse tables. In this paper, we present \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zheng:2021:SVB,
  author =       "Bolong Zheng and Lei Bi and Juan Cao and Hua Chai and
                 Jun Fang and Lu Chen and Yunjun Gao and Xiaofang Zhou
                 and Christian S. Jensen",
  title =        "{SpeakNav}: voice-based route description language
                 understanding for template-driven path search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3056--3068",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476383",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476383",
  abstract =     "Many navigation applications take natural language
                 speech as input, which avoids users typing in words and
                 thus improves traffic safety. However, navigation
                 applications often fail to understand a user's
                 free-form description of a route. In addition,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gomes:2021:RML,
  author =       "Ana Sofia Gomes and Jo{\~a}o Oliveirinha and Pedro
                 Cardoso and Pedro Bizarro",
  title =        "{Railgun}: managing large streaming windows under
                 {MAD} requirements",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3069--3082",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476384",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476384",
  abstract =     "Some mission critical systems, e.g., fraud detection,
                 require accurate, real-time metrics over long time
                 sliding windows on applications that demand high
                 throughput and low latencies. As these applications
                 need to run ``forever'' and cope with large, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Edara:2021:BMW,
  author =       "Pavan Edara and Mosha Pasumansky",
  title =        "Big metadata: when metadata is big data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3083--3095",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476385",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476385",
  abstract =     "The rapid emergence of cloud data warehouses like
                 Google BigQuery has redefined the landscape of data
                 analytics. With the growth of data volumes, such
                 systems need to scale to hundreds of EiB of data in the
                 near future. This growth is accompanied by an
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Stoddard:2021:TRF,
  author =       "Josh Stoddard and Adam Mustafa and Naveen Goela",
  title =        "{Tanium Reveal}: a federated search engine for
                 querying unstructured file data on large enterprise
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3096--3109",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476386",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476386",
  abstract =     "Tanium Reveal is a federated search engine deployed on
                 large-scale enterprise networks that is capable of
                 executing data queries across billions of private data
                 files within 60 seconds. Data resides at the edge of
                 networks, potentially distributed on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gencer:2021:HJL,
  author =       "Can Gencer and Marko Topolnik and Viliam {\v{D}}urina
                 and Emin Demirci and Ensar B. Kahveci and Ali
                 G{\"u}rb{\"u}z and Ond{\v{r}}ej Luk{\'a}{\v{s}} and
                 J{\'o}zsef Bart{\'o}k and Grzegorz Gierlach and
                 Franti{\v{s}}ek Hartman and Ufuk Y{\i}lmaz and Mehmet
                 Do{\u{g}}an and Mohamed Mandouh and Marios Fragkoulis
                 and Asterios Katsifodimos",
  title =        "{Hazelcast Jet}: low-latency stream processing at the
                 99.99-th percentile",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3110--3121",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476387",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476387",
  abstract =     "Jet is an open source, high performance, distributed
                 stream processor built at Hazelcast during the last
                 five years. Jet was engineered with millisecond latency
                 on the 99.99th percentile as its primary design goal.
                 Originally Jet's purpose was to be an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Roy:2021:SWO,
  author =       "Abhishek Roy and Alekh Jindal and Priyanka Gomatam and
                 Xiating Ouyang and Ashit Gosalia and Nishkam Ravi and
                 Swinky Mann and Prakhar Jain",
  title =        "{SparkCruise}: workload optimization in managed spark
                 clusters at {Microsoft}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3122--3134",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476388",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476388",
  abstract =     "Today cloud companies offer fully managed Spark
                 services. This has made it easy to onboard new
                 customers but has also increased the volume of users
                 and their workload sizes. However, both cloud providers
                 and users lack the tools and time to optimize
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Akidau:2021:WSP,
  author =       "Tyler Akidau and Edmon Begoli and Slava Chernyak and
                 Fabian Hueske and Kathryn Knight and Kenneth Knowles
                 and Daniel Mills and Dan Sotolongo",
  title =        "Watermarks in stream processing systems: semantics and
                 comparative analysis of {Apache Flink} and {Google}
                 cloud dataflow",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3135--3147",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476389",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476389",
  abstract =     "Streaming data processing is an exercise in taming
                 disorder: from oftentimes huge torrents of information,
                 we hope to extract powerful and timely analyses. But
                 when dealing with streaming data, the unbounded and
                 temporally disordered nature of real-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Power:2021:CBD,
  author =       "Conor Power and Hiren Patel and Alekh Jindal and Jyoti
                 Leeka and Bob Jenkins and Michael Rys and Ed Triou and
                 Dexin Zhu and Lucky Katahanas and Chakrapani Bhat
                 Talapady and Joshua Rowe and Fan Zhang and Rich Draves
                 and Marc Friedman and Ivan Santa Maria Filho and Amrish
                 Kumar",
  title =        "The {Cosmos} big data platform at {Microsoft}: over a
                 decade of progress and a decade to look forward",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3148--3161",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476390",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476390",
  abstract =     "The twenty-first century has been dominated by the
                 need for large scale data processing, marking the birth
                 of big data platforms such as Cosmos. This paper
                 describes the evolution of the exabyte-scale Cosmos big
                 data platform at Microsoft; our journey \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pandis:2021:EAR,
  author =       "Ippokratis Pandis",
  title =        "The evolution of {Amazon Redshift}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3162--3174",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476391",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476391",
  abstract =     "In 2013, Amazon Web Services revolutionized the data
                 warehousing industry by launching Amazon Redshift [7],
                 the first fully managed, petabyte-scale
                 enterprise-grade cloud data warehouse. Amazon Redshift
                 made it simple and cost-effective to efficiently
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Razniewski:2021:LMK,
  author =       "Simon Razniewski and Hiba Arnaout and Shrestha Ghosh
                 and Fabian Suchanek",
  title =        "On the limits of machine knowledge: completeness,
                 recall and negation in web-scale knowledge bases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3175--3177",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476401",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476401",
  abstract =     "General-purpose knowledge bases (KBs) are an important
                 component of several data-driven applications.
                 Pragmatically constructed from available web sources,
                 these KBs are far from complete, which poses a set of
                 challenges in curation as well as \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Orr:2021:MMP,
  author =       "Laurel Orr and Atindriyo Sanyal and Xiao Ling and
                 Karan Goel and Megan Leszczynski",
  title =        "Managing {ML} pipelines: feature stores and the coming
                 wave of embedding ecosystems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3178--3181",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476402",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476402",
  abstract =     "The industrial machine learning pipeline requires
                 iterating on model features, training and deploying
                 models, and monitoring deployed models at scale.
                 Feature stores were developed to manage and standardize
                 the engineer's workflow in this end-to-end \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:DAM,
  author =       "Yuliang Li and Xiaolan Wang and Zhengjie Miao and
                 Wang-Chiew Tan",
  title =        "Data augmentation for {ML}-driven data preparation and
                 integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3182--3185",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476403",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476403",
  abstract =     "In recent years, we have witnessed the development of
                 novel data augmentation (DA) techniques for creating
                 additional training data needed by machine learning
                 based solutions. In this tutorial, we will provide a
                 comprehensive overview of techniques \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zalipynis:2021:ADP,
  author =       "Ramon Antonio Rodriges Zalipynis",
  title =        "Array {DBMS}: past, present, and (near) future",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3186--3189",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476404",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476404",
  abstract =     "Array DBMSs strive to be the best systems for
                 managing, processing, and even visualizing big N -d
                 arrays. The last decade blossomed with R\&D in array
                 DBMS, making it a young and fast-evolving area. We
                 present the first comprehensive tutorial on array
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:MLD,
  author =       "Guoliang Li and Xuanhe Zhou and Lei Cao",
  title =        "Machine learning for databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3190--3193",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476405",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476405",
  abstract =     "Machine learning techniques have been proposed to
                 optimize the databases. For example, traditional
                 empirical database optimization techniques (e.g., cost
                 estimation, join order selection, knob tuning, index
                 and view advisor) cannot meet the high-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kargar:2021:ELN,
  author =       "Saeed Kargar and Faisal Nawab",
  title =        "Extending the lifetime of {NVM}: challenges and
                 opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3194--3197",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476406",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476406",
  abstract =     "Recently, Non-Volalile Memory (NVM) technology has
                 revolutionized the landscape or memory systems. With
                 many advantages, such as non volatility and near zero
                 standby power consumption, these byte-addressable
                 memory technologies are taking the place of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Echihabi:2021:NTH,
  author =       "Karima Echihabi and Kostas Zoumpatianos and Themis
                 Palpanas",
  title =        "New trends in high-{D} vector similarity search:
                 {AI}-driven, progressive, and distributed",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3198--3201",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476407",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476407",
  abstract =     "Similarity search is a core operation of many critical
                 applications, involving massive collections of
                 high-dimensional (high-d) objects. Objects can be data
                 series, text, multimedia, graphs, database tables or
                 deep network embeddings. In this tutorial, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jindal:2021:MLC,
  author =       "Alekh Jindal and Matteo Interlandi",
  title =        "Machine learning for cloud data systems: the progress
                 so far and the path forward",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3202--3205",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476408",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476408",
  abstract =     "The goal of this tutorial is to educate the audience
                 about the state of the art in ML for cloud data
                 systems, both in research and in practice. The tutorial
                 is divided in two parts: the progress, and the path
                 forward. Part I covers the recent successes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Davidson:2021:JCT,
  author =       "Susan B. Davidson",
  title =        "It's not just cookies and tea",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3206--3206",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476409",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476409",
  abstract =     "Three of the major research themes over my career have
                 been concurrency, integration and provenance. In this
                 talk, I will explain why these themes are not only
                 important in database research, but how they have
                 played a role in my personal success. I \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Neumann:2021:ECQ,
  author =       "Thomas Neumann",
  title =        "Evolution of a compiling query engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3207--3210",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476410",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476410",
  abstract =     "In 2011 we showed how to use dynamic code generation
                 to process queries in a data-centric manner. This
                 execution model can produce compact and efficient code
                 and was successfully used by both our own systems and
                 systems of other groups. As the systems \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pavlo:2021:MYD,
  author =       "Andrew Pavlo and Matthew Butrovich and Lin Ma and
                 Prashanth Menon and Wan Shen Lim and Dana {Van Aken}
                 and William Zhang",
  title =        "Make your database system dream of electric sheep:
                 towards self-driving operation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3211--3221",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476411",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476411",
  abstract =     "Database management systems (DBMSs) are notoriously
                 difficult to deploy and administer. Self-driving DBMSs
                 seek to remove these impediments by managing themselves
                 automatically. Despite decades of DBMS auto-tuning
                 research, a truly autonomous, self-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kraska:2021:TIO,
  author =       "Tim Kraska",
  title =        "Towards instance-optimized data systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3222--3232",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476392",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476392",
  abstract =     "In recent years, we have seen increased interest in
                 applying machine learning to system problems. For
                 example, there has been work on applying machine
                 learning to improve query optimization, indexing,
                 storage layouts, scheduling, log-structured merge
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Weikum:2021:KGD,
  author =       "Gerhard Weikum",
  title =        "Knowledge graphs 2021: a data odyssey",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3233--3238",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476393",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476393",
  abstract =     "Providing machines with comprehensive knowledge of the
                 world's entities and their relationships has been a
                 long-standing vision and challenge for AI. Over the
                 last 15 years, huge knowledge bases, also known as
                 knowledge graphs, have been automatically \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ives:2021:FDB,
  author =       "Zachary G. Ives and Rachel Pottinger and Arun Kumar
                 and Johannes Gehrke and Jana Giceva",
  title =        "The future of data(base) education: is the ``cow
                 book'' dead?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3239--3239",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476394",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476394",
  abstract =     "This panel encourages a debate over the future of
                 database education and its relationship to Data
                 Science: Are Computer Science (CS) and Data Science
                 (DS) different disciplines about to split, and how does
                 that effect how we teach our field? Is there a ``data''
                 course that belongs in CS that all of our students
                 should take? Who is the traditional database course,
                 e.g. based on the ``cow book'', relevant to? What
                 traditional topics should we not be teaching in our
                 core data course(s) and which ones should be added?
                 What do we teach the student who has one elective for
                 data science? How does our community position itself
                 for leadership in CS given the popularity of DS?",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Remis:2021:UVI,
  author =       "Luis Remis and Chaunt{\'e} W. Lacewell",
  title =        "Using {VDMS} to index and search {100M} images",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "12",
  pages =        "3240--3252",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3476311.3476381",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:41:16 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3476311.3476381",
  abstract =     "Data scientists spend most of their time dealing with
                 data preparation, rather than doing what they know
                 best: build machine learning models and algorithms to
                 solve previously unsolvable problems. In this paper, we
                 describe the Visual Data Management \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:TEF,
  author =       "Jian Liu and Kefei Wang and Feng Chen",
  title =        "{TSCache}: an efficient flash-based caching scheme for
                 time-series data workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3253--3266",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484225",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484225",
  abstract =     "Time-series databases are becoming an indispensable
                 component in today's data centers. In order to manage
                 the rapidly growing time-series data, we need an
                 effective and efficient system solution to handle the
                 huge traffic of time-series data queries. A \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:MRL,
  author =       "Huayi Wang and Jingfan Meng and Long Gong and Jun Xu
                 and Mitsunori Ogihara",
  title =        "{MP-RW-LSH}: an efficient multi-probe {LSH} solution
                 to {ANNS-L$_1$}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3267--3280",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484226",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484226",
  abstract =     "Approximate Nearest Neighbor Search (ANNS) is a
                 fundamental algorithmic problem, with numerous
                 applications in many areas of computer science.
                 Locality-Sensitive Hashing (LSH) is one of the most
                 popular solution approaches for ANNS. A common
                 shortcoming of many LSH schemes is that since they
                 probe only a single bucket in a hash table, they need
                 to use a large number of hash tables to achieve a high
                 query accuracy. For ANNS-L2, a multi-probe scheme was
                 proposed to overcome this drawback by strategically
                 probing multiple buckets in a hash table. In this work,
                 we propose MP-RW-LSH, the first and so far only
                 multi-probe LSH solution to ANNS in L1 distance, and
                 show that it achieves a better tradeoff between
                 scalability and query efficiency than all existing
                 LSH-based solutions. We also explain why a
                 state-of-the-art ANNS-L1 solution called Cauchy
                 projection LSH (CP-LSH) is fundamentally not suitable
                 for multi-probe extension. Finally, as a use case, we
                 construct, using MP-RW-LSH as the underlying
                 ``ANNS-L$_1$ engine'', a new ANNS-E (E for edit
                 distance) solution that beats the state of the art.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mailis:2021:VSK,
  author =       "Theofilos Mailis and Yannis Kotidis and Stamatis
                 Christoforidis and Evgeny Kharlamov and Yannis
                 Ioannidis",
  title =        "View selection over knowledge graphs in triple
                 stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3281--3294",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484227",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484227",
  abstract =     "Knowledge Graphs (KGs) are collections of
                 interconnected and annotated entities that have become
                 powerful assets for data integration, search
                 enhancement, and other industrial applications.
                 Knowledge Graphs such as DBPEDIA may contain billion of
                 triple \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:FHO,
  author =       "Dongjie Li and Siyi Lv and Yanyu Huang and Yijing Liu
                 and Tong Li and Zheli Liu and Liang Guo",
  title =        "Frequency-hiding order-preserving encryption with
                 small client storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3295--3307",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484228",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484228",
  abstract =     "The range query on encrypted databases is usually
                 implemented using the order-preserving encryption (OPE)
                 technique which preserves the order of plaintexts.
                 Since the frequency leakage of plaintexts makes OPE
                 vulnerable to frequency-analyzing attacks, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Koutsoukos:2021:MMR,
  author =       "Dimitrios Koutsoukos and Ingo M{\"u}ller and Renato
                 Marroqu{\'\i}n and Ana Klimovic and Gustavo Alonso",
  title =        "Modularis: modular relational analytics over
                 heterogeneous distributed platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3308--3321",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484229",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484229",
  abstract =     "The enormous quantity of data produced every day
                 together with advances in data analytics has led to a
                 proliferation of data management and analysis systems.
                 Typically, these systems are built around highly
                 specialized monolithic operators optimized for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lou:2021:TTA,
  author =       "Yunkai Lou and Chaokun Wang and Tiankai Gu and Hao
                 Feng and Jun Chen and Jeffrey Xu Yu",
  title =        "Time-topology analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3322--3334",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484230",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484230",
  abstract =     "Many real-world networks have been evolving, and are
                 finely modeled as temporal graphs from the viewpoint of
                 the graph theory. A temporal graph is informative, and
                 always contains two types of information, i.e., the
                 temporal information and topological \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bernau:2021:QIC,
  author =       "Daniel Bernau and G{\"u}nther Eibl and Philip W.
                 Grassal and Hannah Keller and Florian Kerschbaum",
  title =        "Quantifying identifiability to choose and audit
                 $\epsilon$ in differentially private deep learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3335--3347",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484231",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484231",
  abstract =     "Differential privacy allows bounding the influence
                 that training data records have on a machine learning
                 model. To use differential privacy in machine learning,
                 data scientists must choose privacy parameters (
                 \epsilon , \delta ). Choosing meaningful privacy
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Laigner:2021:DMM,
  author =       "Rodrigo Laigner and Yongluan Zhou and Marcos Antonio
                 Vaz Salles and Yijian Liu and Marcos Kalinowski",
  title =        "Data management in microservices: state of the
                 practice, challenges, and research directions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3348--3361",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484232",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484232",
  abstract =     "Microservices have become a popular architectural
                 style for data-driven applications, given their ability
                 to functionally decompose an application into small and
                 autonomous services to achieve scalability, strong
                 isolation, and specialization of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ammerlaan:2021:PDM,
  author =       "Remmelt Ammerlaan and Gilbert Antonius and Marc
                 Friedman and H. M. Sajjad Hossain and Alekh Jindal and
                 Peter Orenberg and Hiren Patel and Shi Qiao and Vijay
                 Ramani and Lucas Rosenblatt and Abhishek Roy and Irene
                 Shaffer and Soundarajan Srinivasan and Markus Weimer",
  title =        "{PerfGuard}: deploying {ML}-for-systems without
                 performance regressions, almost!",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3362--3375",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484233",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484233",
  abstract =     "Modern data processing systems require optimization at
                 massive scale, and using machine learning to optimize
                 these systems (ML-for-systems) has shown promising
                 results. Unfortunately, ML-for-systems is subject to
                 over generalizations that do not capture \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ding:2021:DDS,
  author =       "Bailu Ding and Surajit Chaudhuri and Johannes Gehrke
                 and Vivek Narasayya",
  title =        "{DSB}: a decision support benchmark for
                 workload-driven and traditional database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3376--3388",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484234",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484234",
  abstract =     "We describe a new benchmark, DSB, for evaluating both
                 workload-driven and traditional database systems on
                 modern decision support workloads. DSB is adapted from
                 the widely-used industrial-standard TPC-DS benchmark.
                 It enhances the TPC-DS benchmark with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hernandez:2021:CHP,
  author =       "Daniel Hern{\'a}ndez and Luis Gal{\'a}rraga and Katja
                 Hose",
  title =        "Computing how-provenance for {SPARQL} queries via
                 query rewriting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3389--3401",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484235",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484235",
  abstract =     "Over the past few years, we have witnessed the
                 emergence of large knowledge graphs built by extracting
                 and combining information from multiple sources. This
                 has propelled many advances in query processing over
                 knowledge graphs, however the aspect of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:UUD,
  author =       "Junxiong Wang and Immanuel Trummer and Debabrota
                 Basu",
  title =        "{UDO}: universal database optimization using
                 reinforcement learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3402--3414",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484236",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484236",
  abstract =     "UDO is a versatile tool for offline tuning of database
                 systems for specific workloads. UDO can consider a
                 variety of tuning choices, reaching from picking
                 transaction code variants over index selections up to
                 database system parameter tuning. UDO uses \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Feldmann:2021:ITA,
  author =       "Anja Feldmann",
  title =        "{Internet} traffic analysis at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3415--3415",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484237",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484237",
  abstract =     "In this talk, I will use multiple internet measurement
                 studies as examples to outline the challenges that we
                 face when performing internet-scale traffic analysis,
                 including implications of the COVID-19 pandemic on
                 internet traffic as well as detecting \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Koutra:2021:PSG,
  author =       "Danai Koutra",
  title =        "The power of summarization in graph mining and
                 learning: smaller data, faster methods, more
                 interpretability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3416--3416",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484238",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484238",
  abstract =     "Our ability to generate, collect, and archive data
                 related to everyday activities, such as interacting on
                 social media, browsing the web, and monitoring
                 well-being, is rapidly increasing. Getting the most
                 benefit from this large-scale data requires \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shah:2021:SPL,
  author =       "Nigam Shah",
  title =        "Summarizing patients like mine via an on-demand
                 consultation service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3417--3417",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484242",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484242",
  abstract =     "Using evidence derived from previously collected
                 medical records to guide patient care has been a
                 long-standing vision of clinicians and informaticians,
                 and one with the potential to transform medical
                 practice. We offered an on-demand consultation
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vanschoren:2021:TSO,
  author =       "Joaquin Vanschoren",
  title =        "Towards scalable online machine learning
                 collaborations with {OpenML}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3418--3418",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484239",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484239",
  abstract =     "Is massively collaborative machine learning possible?
                 Can we share and organize our collective knowledge of
                 machine learning to solve ever more challenging
                 problems? In a way, yes: as a community, we are already
                 very successful at developing high-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vartak:2021:MMI,
  author =       "Manasi Vartak",
  title =        "From {ML} models to intelligent applications: the rise
                 of {MLOps}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3419--3419",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484240",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484240",
  abstract =     "The last 5+ years in ML have focused on building the
                 best models, hyperparameter optimization, parallel
                 training, massive neural networks, etc. Now that the
                 building of models has become easy, models are being
                 integrated into every piece of software and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zaharia:2021:DPF,
  author =       "Matei Zaharia",
  title =        "Designing production-friendly machine learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "14",
  number =       "13",
  pages =        "3420--3420",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3484224.3484241",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Oct 29 16:38:15 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3484224.3484241",
  abstract =     "Building production ML applications is difficult
                 because of their resource cost and complex failure
                 modes. I will discuss these challenges from two
                 perspectives: the Stanford DAWN Lab and experience with
                 large-scale commercial ML users at Databricks. I
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2021:ASA,
  author =       "Kang Zhao and Liuyihan Song and Yingya Zhang and Pan
                 Pan and Yinghui Xu and Rong Jin",
  title =        "{ANN} softmax: acceleration of extreme classification
                 training",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "1--10",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485451",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485451",
  abstract =     "Thanks to the popularity of GPU and the growth of its
                 computational power, more and more deep learning tasks,
                 such as face recognition, image retrieval and word
                 embedding, can take advantage of extreme classification
                 to improve accuracy. However, it \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yu:2021:WTD,
  author =       "Gyeong-In Yu and Saeed Amizadeh and Sehoon Kim and
                 Artidoro Pagnoni and Ce Zhang and Byung-Gon Chun and
                 Markus Weimer and Matteo Interlandi",
  title =        "{WindTunnel}: towards differentiable {ML} pipelines
                 beyond a single model",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "11--20",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485452",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485452",
  abstract =     "While deep neural networks (DNNs) have shown to be
                 successful in several domains like computer vision,
                 non-DNN models such as linear models and gradient
                 boosting trees are still considered state-of-the-art
                 over tabular data. When using these models, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Skiadopoulos:2021:DDO,
  author =       "Athinagoras Skiadopoulos and Qian Li and Peter Kraft
                 and Kostis Kaffes and Daniel Hong and Shana Mathew and
                 David Bestor and Michael Cafarella and Vijay Gadepally
                 and Goetz Graefe and Jeremy Kepner and Christos
                 Kozyrakis and Tim Kraska and Michael Stonebraker and
                 Lalith Suresh and Matei Zaharia",
  title =        "{DBOS}: a {DBMS}-oriented operating system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "21--30",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485454",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485454",
  abstract =     "This paper lays out the rationale for building a
                 completely new operating system (OS) stack. Rather than
                 build on a single node OS together with separate
                 cluster schedulers, distributed filesystems, and
                 network managers, we argue that a distributed
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jain:2021:DIA,
  author =       "Arjit Jain and Sunita Sarawagi and Prithviraj Sen",
  title =        "Deep indexed active learning for matching
                 heterogeneous entity representations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "31--45",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485455",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485455",
  abstract =     "Given two large lists of records, the task in entity
                 resolution (ER) is to find the pairs from the Cartesian
                 product of the lists that correspond to the same real
                 world entity. Typically, passive learning methods on
                 such tasks require large amounts of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2021:LQR,
  author =       "Xuanhe Zhou and Guoliang Li and Chengliang Chai and
                 Jianhua Feng",
  title =        "A learned query rewrite system using {Monte Carlo}
                 tree search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "46--58",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485456",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485456",
  abstract =     "Query rewrite transforms a SQL query into an
                 equivalent one but with higher performance. However,
                 SQL rewrite is an NP-hard problem, and existing
                 approaches adopt heuristics to rewrite the queries.
                 These heuristics have two main limitations. First, the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2021:DCP,
  author =       "Yin Lin and Brit Youngmann and Yuval Moskovitch and H.
                 V. Jagadish and Tova Milo",
  title =        "On detecting cherry-picked generalizations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "59--71",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485457",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485457",
  abstract =     "Generalizing from detailed data to statements in a
                 broader context is often critical for users to make
                 sense of large data sets. Correspondingly, poorly
                 constructed generalizations might convey misleading
                 information even if the statements are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2021:FNF,
  author =       "Jiayi Wang and Chengliang Chai and Jiabin Liu and
                 Guoliang Li",
  title =        "{FACE}: a normalizing flow based cardinality
                 estimator",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "72--84",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485458",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485458",
  abstract =     "Cardinality estimation is one of the most important
                 problems in query optimization. Recently, machine
                 learning based techniques have been proposed to
                 effectively estimate cardinality, which can be broadly
                 classified into query-driven and data-driven \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2021:LCE,
  author =       "Ji Sun and Jintao Zhang and Zhaoyan Sun and Guoliang
                 Li and Nan Tang",
  title =        "Learned cardinality estimation: a design space
                 exploration and a comparative evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "85--97",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485459",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485459",
  abstract =     "Cardinality estimation is core to the query optimizers
                 of DBMSs. Non-learned methods, especially based on
                 histograms and samplings, have been widely used in
                 commercial and open-source DBMSs. Nevertheless,
                 histograms and samplings can only be used to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{He:2021:DAD,
  author =       "Dong He and Maureen Daum and Walter Cai and Magdalena
                 Balazinska",
  title =        "{DeepEverest}: accelerating declarative top-{$K$}
                 queries for deep neural network interpretation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "98--111",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485460",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485460",
  abstract =     "We design, implement, and evaluate DeepEverest, a
                 system for the efficient execution of interpretation by
                 example queries over the activation values of a deep
                 neural network. DeepEverest consists of an efficient
                 indexing technique and a query execution \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chatterjee:2021:CCC,
  author =       "Subarna Chatterjee and Meena Jagadeesan and Wilson Qin
                 and Stratos Idreos",
  title =        "{Cosine}: a cloud-cost optimized self-designing
                 key--value storage engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "112--126",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485461",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485461",
  abstract =     "We present a self-designing key-value storage engine,
                 Cosine, which can always take the shape of the close to
                 ``perfect'' engine architecture given an input
                 workload, a cloud budget, a target performance, and
                 required cloud SLAs. By identifying and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Adnan:2021:ARS,
  author =       "Muhammad Adnan and Yassaman Ebrahimzadeh Maboud and
                 Divya Mahajan and Prashant J. Nair",
  title =        "Accelerating recommendation system training by
                 leveraging popular choices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "1",
  pages =        "127--140",
  month =        sep,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3485450.3485462",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jan 20 16:04:55 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3485450.3485462",
  abstract =     "Recommender models are commonly used to suggest
                 relevant items to a user for e-commerce and online
                 advertisement-based applications. These models use
                 massive embedding tables to store numerical
                 representation of items' and users' categorical
                 variables \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2021:BCE,
  author =       "Jianye Yang and Yun Peng and Wenjie Zhang",
  title =        "$ (p, q) $-biclique counting and enumeration for large
                 sparse bipartite graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "141--153",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489497",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489497",
  abstract =     "In this paper, we study the problem of ( p,
                 q)-biclique counting and enumeration for large sparse
                 bipartite graphs. Given a bipartite G = ( U, V, E), and
                 two integer parameters p and q, we aim to efficiently
                 count and enumerate all (p, q)-bicliques in G,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Graur:2021:EQL,
  author =       "Dan Graur and Ingo M{\"u}ller and Mason Proffitt and
                 Ghislain Fourny and Gordon T. Watts and Gustavo
                 Alonso",
  title =        "Evaluating query languages and systems for high-energy
                 physics data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "154--168",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489498",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489498",
  abstract =     "In the domain of high-energy physics (HEP), query
                 languages in general and SQL in particular have found
                 limited acceptance. This is surprising since HEP data
                 analysis matches the SQL model well: the data is fully
                 structured and queried using mostly \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hao:2021:DHC,
  author =       "Kongzhang Hao and Long Yuan and Wenjie Zhang",
  title =        "Distributed hop-constrained $s$--$t$ simple path
                 enumeration at billion scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "169--182",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489499",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489499",
  abstract =     "Hop-constrained s-t simple path (HC-s-t path)
                 enumeration is a fundamental problem in graph analysis
                 and has received considerable attention recently.
                 Straightforward distributed solutions are inefficient
                 and suffer from poor scalabiltiy when addressing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fang:2021:EAO,
  author =       "Jingzhi Fang and Yanyan Shen and Yue Wang and Lei
                 Chen",
  title =        "{ETO}: accelerating optimization of {DNN} operators by
                 high-performance tensor program reuse",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "183--195",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489500",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489500",
  abstract =     "Recently, deep neural networks (DNNs) have achieved
                 great success in various applications, where low
                 inference latency is important. Existing solutions
                 either manually tune the kernel library or utilize
                 search-based compilation to reduce the operator
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Grulich:2021:BEE,
  author =       "Philipp Marian Grulich and Steffen Zeuch and Volker
                 Markl",
  title =        "{Babelfish}: efficient execution of polyglot queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "196--210",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489501",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489501",
  abstract =     "Today's users of data processing systems come from
                 different domains, have different levels of expertise,
                 and prefer different programming languages. As a
                 result, analytical workload requirements shifted from
                 relational to polyglot queries involving \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2021:BCU,
  author =       "Alexander Zhou and Yue Wang and Lei Chen",
  title =        "Butterfly counting on uncertain bipartite graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "211--223",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489502",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489502",
  abstract =     "When considering uncertain bipartite networks, the
                 number of instances of the popular graphlet structure
                 the butterfly may be used as an important metric to
                 quickly gauge information about the network. This
                 Uncertain Butterfly Count has practical usages
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cui:2021:MGG,
  author =       "Yue Cui and Kai Zheng and Dingshan Cui and Jiandong
                 Xie and Liwei Deng and Feiteng Huang and Xiaofang
                 Zhou",
  title =        "{METRO}: a generic graph neural network framework for
                 multivariate time series forecasting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "224--236",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489503",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489503",
  abstract =     "Multivariate time series forecasting has been drawing
                 increasing attention due to its prevalent applications.
                 It has been commonly assumed that leveraging latent
                 dependencies between pairs of variables can enhance
                 prediction accuracy. However, most \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ge:2021:LAE,
  author =       "Congcong Ge and Xiaoze Liu and Lu Chen and Yunjun Gao
                 and Baihua Zheng",
  title =        "{LargeEA}: aligning entities for large-scale knowledge
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "237--245",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489504",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489504",
  abstract =     "Entity alignment (EA) aims to find equivalent entities
                 in different knowledge graphs (KGs). Current EA
                 approaches suffer from scalability issues, limiting
                 their usage in real-world EA scenarios. To tackle this
                 challenge, we propose LargeEA to align \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2021:HHG,
  author =       "Kejing Lu and Mineichi Kudo and Chuan Xiao and
                 Yoshiharu Ishikawa",
  title =        "{HVS}: hierarchical graph structure based on {Voronoi}
                 diagrams for solving approximate nearest neighbor
                 search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "246--258",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489506",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489506",
  abstract =     "Approximate nearest neighbor search (ANNS) is a
                 fundamental problem that has a wide range of
                 applications in information retrieval and data mining.
                 Among state-of-the-art in-memory ANNS methods,
                 graph-based methods have attracted particular interest
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Arman:2021:OHP,
  author =       "Arif Arman and Dmitri Loguinov",
  title =        "{Origami}: a high-performance mergesort framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "259--271",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489507",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489507",
  abstract =     "Mergesort is a popular algorithm for sorting
                 real-world workloads as it is immune to data skewness,
                 suitable for parallelization using vectorized
                 intrinsics, and relatively simple to multi-thread. In
                 this paper, we introduce Origami, an in-memory merge-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2021:LSL,
  author =       "Renzhi Wu and Bolin Ding and Xu Chu and Zhewei Wei and
                 Xiening Dai and Tao Guan and Jingren Zhou",
  title =        "Learning to be a statistician: learned estimator for
                 number of distinct values",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "272--284",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489508",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489508",
  abstract =     "Estimating the number of distinct values (NDV) in a
                 column is useful for many tasks in database systems,
                 such as columnstore compression and data profiling. In
                 this work, we focus on how to derive accurate NDV
                 estimations from random (online/offline) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yu:2021:PFP,
  author =       "Shangdi Yu and Yiqiu Wang and Yan Gu and Laxman
                 Dhulipala and Julian Shun",
  title =        "{ParChain}: a framework for parallel hierarchical
                 agglomerative clustering using nearest-neighbor chain",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "285--298",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489509",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489509",
  abstract =     "This paper studies the hierarchical clustering
                 problem, where the goal is to produce a dendrogram that
                 represents clusters at varying scales of a data set. We
                 propose the ParChain framework for designing parallel
                 hierarchical agglomerative clustering \ldots{}.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chauhan:2021:ARP,
  author =       "Komal Chauhan and Kartik Jain and Sayan Ranu and
                 Srikanta Bedathur and Amitabha Bagchi",
  title =        "Answering regular path queries through exemplars",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "299--311",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489510",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489510",
  abstract =     "Regular simple path query (RPQ) is one of the
                 fundamental operators in graph analytics. In an RPQ,
                 the input is a graph, a source node and a regular
                 expression. The goal is to identify all nodes that are
                 connected to the source through a simple path
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Miao:2021:SHE,
  author =       "Xupeng Miao and Hailin Zhang and Yining Shi and
                 Xiaonan Nie and Zhi Yang and Yangyu Tao and Bin Cui",
  title =        "{HET}: scaling out huge embedding model training via
                 cache-enabled distributed framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "312--320",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489511",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489511",
  abstract =     "Embedding models have been an effective learning
                 paradigm for high-dimensional data. However, one open
                 issue of embedding models is that their representations
                 (latent factors) often result in large parameter space.
                 We observe that existing distributed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:FFG,
  author =       "Pengfei Li and Yu Hua and Jingnan Jia and Pengfei
                 Zuo",
  title =        "{FINEdex}: a fine-grained learned index scheme for
                 scalable and concurrent memory systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "321--334",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489512",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489512",
  abstract =     "Index structures in memory systems become important to
                 improve the entire system performance. The promising
                 learned indexes leverage deep-learning models to
                 complement existing index structures and obtain
                 significant performance improvements. Existing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bai:2021:TTA,
  author =       "Jiyang Bai and Peixiang Zhao",
  title =        "{TaGSim}: type-aware graph similarity learning and
                 computation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "335--347",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489513",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489513",
  abstract =     "Computing similarity between graphs is a fundamental
                 and critical problem in graph-based applications, and
                 one of the most commonly used graph similarity measures
                 is graph edit distance (GED), defined as the minimum
                 number of graph edit operations that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2021:AIC,
  author =       "Yuqing Zhu and Jing Tang and Xueyan Tang and Lei
                 Chen",
  title =        "Analysis of influence contribution in social
                 advertising",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "348--360",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489514",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489514",
  abstract =     "Online Social Network (OSN) providers usually conduct
                 advertising campaigns by inserting social ads into
                 promoted posts. Whenever a user engages in a promoted
                 ad, she may further propagate the promoted ad to her
                 followers recursively and the propagation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Theodorakis:2021:SSN,
  author =       "Georgios Theodorakis and Fotios Kounelis and Peter
                 Pietzuch and Holger Pirk",
  title =        "{Scabbard}: single-node fault-tolerant stream
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "361--374",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489515",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489515",
  abstract =     "Single-node multi-core stream processing engines
                 (SPEs) can process hundreds of millions of tuples per
                 second. Yet making them fault-tolerant with
                 exactly-once semantics while retaining this performance
                 is an open challenge: due to the limited I/O \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Konstantinidis:2021:EPC,
  author =       "George Konstantinidis and Jet Holt and Adriane
                 Chapman",
  title =        "Enabling personal consent in databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "2",
  pages =        "375--387",
  month =        oct,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3489496.3489516",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:26:54 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3489496.3489516",
  abstract =     "Users have the right to consent to the use of their
                 data, but current methods are limited to very
                 coarse-grained expressions of consent, as
                 ``opt-in/opt-out'' choices for certain uses. In this
                 paper we identify the need for fine-grained consent
                 management \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:ESB,
  author =       "Yejia Liu and Weiyuan Wu and Lampros Flokas and
                 Jiannan Wang and Eugene Wu",
  title =        "Enabling {SQL}-based training data debugging for
                 federated learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "388--400",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494125",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494125",
  abstract =     "How can we debug a logistic regression model in a
                 federated learning setting when seeing the model behave
                 unexpectedly (e.g., the model rejects all high-income
                 customers' loan applications)? The SQL-based training
                 data debugging framework has proved \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vaidya:2021:LQL,
  author =       "Kapil Vaidya and Anshuman Dutt and Vivek Narasayya and
                 Surajit Chaudhuri",
  title =        "Leveraging query logs and machine learning for
                 parametric query optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "401--413",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494126",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494126",
  abstract =     "Parametric query optimization (PQO) must address two
                 problems: identify a relatively small number of plans
                 to cache for a parameterized query (populateCache), and
                 efficiently select the best cached plan to use for
                 executing any instance of the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2021:PTS,
  author =       "Yao Lu and Srikanth Kandula and Arnd Christian
                 K{\"o}nig and Surajit Chaudhuri",
  title =        "Pre-training summarization models of structured
                 datasets for cardinality estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "414--426",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494127",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494127",
  abstract =     "We consider the problem of pre-training models which
                 convert structured datasets into succinct summaries
                 that can be used to answer cardinality estimation
                 queries. Doing so avoids per-dataset training and, in
                 our experiments, reduces the time to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rao:2021:XEF,
  author =       "Susie Xi Rao and Shuai Zhang and Zhichao Han and Zitao
                 Zhang and Wei Min and Zhiyao Chen and Yinan Shan and
                 Yang Zhao and Ce Zhang",
  title =        "{xFraud}: explainable fraud transaction detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "427--436",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494128",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494128",
  abstract =     "At online retail platforms, it is crucial to actively
                 detect the risks of transactions to improve customer
                 experience and minimize financial loss. In this work,
                 we propose xFraud, an explainable fraud transaction
                 prediction framework which is mainly \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yuan:2021:SMG,
  author =       "Ye Yuan and Delong Ma and Zhenyu Wen and Zhiwei Zhang
                 and Guoren Wang",
  title =        "Subgraph matching over graph federation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "437--450",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494129",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494129",
  abstract =     "Many real-life applications require processing graph
                 data across heterogeneous sources. In this paper, we
                 define the graph federation that indicates that the
                 graph data sources are temporarily federated and offer
                 their data for users. Next, we propose a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Niu:2021:PBD,
  author =       "Xing Niu and Boris Glavic and Ziyu Liu and Pengyuan Li
                 and Dieter Gawlick and Vasudha Krishnaswamy and Zhen
                 Hua Liu and Danica Porobic",
  title =        "Provenance-based data skipping",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "451--464",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494130",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494130",
  abstract =     "Database systems use static analysis to determine
                 upfront which data is needed for answering a query and
                 use indexes and other physical design techniques to
                 speed-up access to that data. However, for important
                 classes of queries, e.g., HAVING and top-k \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jin:2021:DTL,
  author =       "Di Jin and Bunyamin Sisman and Hao Wei and Xin Luna
                 Dong and Danai Koutra",
  title =        "Deep transfer learning for multi-source entity linkage
                 via domain adaptation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "465--477",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494131",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494131",
  abstract =     "Multi-source entity linkage focuses on integrating
                 knowledge from multiple sources by linking the records
                 that represent the same real world entity. This is
                 critical in high-impact applications such as data
                 cleaning and user stitching. The state-of-the-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xing:2021:EEI,
  author =       "Lu Xing and Eric Lee and Tong An and Bo-Cheng Chu and
                 Ahmed Mahmood and Ahmed M. Aly and Jianguo Wang and
                 Walid G. Aref",
  title =        "An experimental evaluation and investigation of waves
                 of misery in $r$-trees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "478--490",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494132",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494132",
  abstract =     "Waves of misery is a phenomenon where spikes of many
                 node splits occur over short periods of time in tree
                 indexes. Waves of misery negatively affect the
                 performance of tree indexes in insertion-heavy
                 workloads. Waves of misery have been first observed
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:PPR,
  author =       "Yongyi Liu and Ahmed R. Mahmood and Amr Magdy and
                 Sergio Rey",
  title =        "{PRUC}: {P-regions} with user-defined constraint",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "491--503",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494133",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494133",
  abstract =     "This paper introduces a generalized spatial
                 regionalization problem, namely, PRUC ( P -Regions with
                 User-defined Constraint) that partitions spatial areas
                 into homogeneous regions. PRUC accounts for
                 user-defined constraints imposed over aggregate region
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2021:PIR,
  author =       "Yile Chen and Xiucheng Li and Gao Cong and Cheng Long
                 and Zhifeng Bao and Shang Liu and Wanli Gu and Fuzheng
                 Zhang",
  title =        "Points-of-interest relationship inference with
                 spatial-enriched graph neural networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "504--512",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494134",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494134",
  abstract =     "As a fundamental component in location-based services,
                 inferring the relationship between points-of-interests
                 (POIs) is very critical for service providers to offer
                 good user experience to business owners and customers.
                 Most of the existing methods for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chan:2021:SSA,
  author =       "Tsz Nam Chan and Pak Lon Ip and Leong Hou U. and Byron
                 Choi and Jianliang Xu",
  title =        "{SAFE}: a share-and-aggregate bandwidth exploration
                 framework for kernel density visualization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "513--526",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494135",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494135",
  abstract =     "Kernel density visualization (KDV) has been the de
                 facto method in many spatial analysis tasks, including
                 ecological modeling, crime hotspot detection, traffic
                 accident hotspot detection, and disease outbreak
                 detection. In these tasks, domain experts \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dittrich:2021:NYD,
  author =       "Jens Dittrich and Joris Nix and Christian Sch{\"o}n",
  title =        "The next 50 years in database indexing or: the case
                 for automatically generated index structures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "527--540",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494136",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494136",
  abstract =     "Index structures are a building block of query
                 processing and computer science in general. Since the
                 dawn of computer technology there have been index
                 structures. And since then, a myriad of index
                 structures are being invented and published each and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chapnik:2021:DDA,
  author =       "Koral Chapnik and Ilya Kolchinsky and Assaf Schuster",
  title =        "{DARLING}: data-aware load shedding in complex event
                 processing systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "541--554",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494137",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494137",
  abstract =     "Complex event processing (CEP) is widely employed to
                 detect user-defined combinations, or patterns, of
                 events in massive streams of incoming data. Numerous
                 applications such as healthcare, fraud detection, and
                 more, use CEP technologies to capture \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhuo:2021:RMO,
  author =       "Danyang Zhuo and Kaiyuan Zhang and Zhuohan Li and
                 Siyuan Zhuang and Stephanie Wang and Ang Chen and Ion
                 Stoica",
  title =        "Rearchitecting in-memory object stores for low
                 latency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "555--568",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494138",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494138",
  abstract =     "Low latency is increasingly critical for modern
                 workloads, to the extent that compute functions are
                 explicitly scheduled to be co-located with their
                 in-memory object stores for faster access. However, the
                 traditional object store architecture mandates
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ma:2021:MTE,
  author =       "Pingchuan Ma and Shuai Wang",
  title =        "{MT-teql}: evaluating and augmenting neural {NLIDB} on
                 real-world linguistic and schema variations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "569--582",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494139",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494139",
  abstract =     "Natural Language Interface to Database (NLIDB)
                 translates human utterances into SQL queries and
                 enables database interactions for non-expert users.
                 Recently, neural network models have become a major
                 approach to implementing NLIDB. However, neural NLIDB
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shi:2021:TPE,
  author =       "Jessica Shi and Laxman Dhulipala and Julian Shun",
  title =        "Theoretically and practically efficient parallel
                 nucleus decomposition",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "583--596",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494140",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494140",
  abstract =     "This paper studies the nucleus decomposition problem,
                 which has been shown to be useful in finding dense
                 substructures in graphs. We present a novel parallel
                 algorithm that is efficient both in theory and in
                 practice. Our algorithm achieves a work \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2021:AHP,
  author =       "Baotong Lu and Jialin Ding and Eric Lo and Umar Farooq
                 Minhas and Tianzheng Wang",
  title =        "{APEX}: a high-performance learned index on persistent
                 memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "597--610",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494141",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494141",
  abstract =     "The recently released persistent memory (PM) offers
                 high performance, persistence, and is cheaper than
                 DRAM. This opens up new possibilities for indexes that
                 operate and persist data directly on the memory bus.
                 Recent learned indexes exploit data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Campos:2021:UTS,
  author =       "David Campos and Tung Kieu and Chenjuan Guo and
                 Feiteng Huang and Kai Zheng and Bin Yang and Christian
                 S. Jensen",
  title =        "Unsupervised time series outlier detection with
                 diversity-driven convolutional ensembles",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "611--623",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494142",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494142",
  abstract =     "With the sweeping digitalization of societal, medical,
                 industrial, and scientific processes, sensing
                 technologies are being deployed that produce increasing
                 volumes of time series data, thus fueling a plethora of
                 new or improved applications. In this \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Miao:2021:EED,
  author =       "Xiaoye Miao and Yangyang Wu and Lu Chen and Yunjun Gao
                 and Jun Wang and Jianwei Yin",
  title =        "Efficient and effective data imputation with influence
                 functions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "624--632",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494143",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494143",
  abstract =     "Data imputation has been extensively explored to solve
                 the missing data problem. The dramatically rising
                 volume of missing data makes the training of imputation
                 models computationally infeasible in real-life
                 scenarios. In this paper, we propose an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kochsiek:2021:PTK,
  author =       "Adrian Kochsiek and Rainer Gemulla",
  title =        "Parallel training of knowledge graph embedding models:
                 a comparison of techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "633--645",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494144",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494144",
  abstract =     "Knowledge graph embedding (KGE) models represent the
                 entities and relations of a knowledge graph (KG) using
                 dense continuous representations called embeddings. KGE
                 methods have recently gained traction for tasks such as
                 knowledge graph completion and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vitagliano:2021:DLT,
  author =       "Gerardo Vitagliano and Lan Jiang and Felix Naumann",
  title =        "Detecting layout templates in complex multiregion
                 files",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "646--658",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494145",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494145",
  abstract =     "Spreadsheets are among the most commonly used file
                 formats for data management, distribution, and
                 analysis. Their widespread employment makes it easy to
                 gather large collections of data, but their flexible
                 canvas-based structure makes automated analysis
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Maliszewski:2021:WPJ,
  author =       "Kajetan Maliszewski and Jorge-Arnulfo Quian{\'e}-Ruiz
                 and Jonas Traub and Volker Markl",
  title =        "What is the price for joining securely?: benchmarking
                 equi-joins in trusted execution environments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "659--672",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494146",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494146",
  abstract =     "Protection of personal data has been raised to be
                 among the top requirements of modern systems. At the
                 same time, it is now frequent that the owner of the
                 data and the owner of the computing infrastructure are
                 two entities with limited trust between \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ho:2021:ETP,
  author =       "Van Long Ho and Nguyen Ho and Torben Bach Pedersen",
  title =        "Efficient temporal pattern mining in big time series
                 using mutual information",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "673--685",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494147",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494147",
  abstract =     "Very large time series are increasingly available from
                 an ever wider range of IoT-enabled sensors deployed in
                 different environments. Significant insights can be
                 gained by mining temporal patterns from these time
                 series. Unlike traditional pattern \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:ELC,
  author =       "Junhua Zhang and Long Yuan and Wentao Li and Lu Qin
                 and Ying Zhang",
  title =        "Efficient label-constrained shortest path queries on
                 road networks: a tree decomposition approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "686--698",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494148",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494148",
  abstract =     "Computing the shortest path between two vertices is a
                 fundamental problem in road networks. Most of the
                 existing works assume that the edges in the road
                 networks have no labels, but in many real applications,
                 the edges have labels and label constraints \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Suri:2021:ENC,
  author =       "Sahaana Suri and Ihab F. Ilyas and Christopher R{\'e}
                 and Theodoros Rekatsinas",
  title =        "{Ember}: no-code context enrichment via
                 similarity-based keyless joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "699--712",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494149",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494149",
  abstract =     "Structured data, or data that adheres to a pre-defined
                 schema, can suffer from fragmented context: information
                 describing a single entity can be scattered across
                 multiple datasets or tables tailored for specific
                 business needs, with no explicit linking \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vu:2021:IPE,
  author =       "Tin Vu and Ahmed Eldawy and Vagelis Hristidis and
                 Vassilis Tsotras",
  title =        "Incremental partitioning for efficient spatial data
                 analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "713--726",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494150",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494150",
  abstract =     "Big spatial data has become ubiquitous, from mobile
                 applications to satellite data. In most of these
                 applications, data is continuously growing to huge
                 volumes. Existing systems for big spatial data organize
                 records at either the record-level or block-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2021:LAV,
  author =       "Doris Jung-Lin Lee and Dixin Tang and Kunal Agarwal
                 and Thyne Boonmark and Caitlyn Chen and Jake Kang and
                 Ujjaini Mukhopadhyay and Jerry Song and Micah Yong and
                 Marti A. Hearst and Aditya G. Parameswaran",
  title =        "{Lux}: always-on visualization recommendations for
                 exploratory dataframe workflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "727--738",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494151",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494151",
  abstract =     "Exploratory data science largely happens in
                 computational notebooks with dataframe APIs, such as
                 pandas, that support flexible means to transform,
                 clean, and analyze data. Yet, visually exploring data
                 in dataframes remains tedious, requiring substantial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Petersohn:2021:FRB,
  author =       "Devin Petersohn and Dixin Tang and Rehan Durrani and
                 Areg Melik-Adamyan and Joseph E. Gonzalez and Anthony
                 D. Joseph and Aditya G. Parameswaran",
  title =        "Flexible rule-based decomposition and metadata
                 independence in modin: a parallel dataframe system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "3",
  pages =        "739--751",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3494124.3494152",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Feb 5 06:35:56 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3494124.3494152",
  abstract =     "Dataframes have become universally popular as a means
                 to represent data in various stages of structure, and
                 manipulate it using a rich set of operators---thereby
                 becoming an essential tool in the data scientists'
                 toolbox. However, dataframe systems, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Han:2021:CED,
  author =       "Yuxing Han and Ziniu Wu and Peizhi Wu and Rong Zhu and
                 Jingyi Yang and Liang Wei Tan and Kai Zeng and Gao Cong
                 and Yanzhao Qin and Andreas Pfadler and Zhengping Qian
                 and Jingren Zhou and Jiangneng Li and Bin Cui",
  title =        "Cardinality estimation in {DBMS}: a comprehensive
                 benchmark evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "752--765",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503586",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503586",
  abstract =     "Cardinality estimation (CardEst) plays a significant
                 role in generating high-quality query plans for a query
                 optimizer in DBMS. In the last decade, an increasing
                 number of advanced CardEst methods (especially
                 ML-based) have been proposed with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2021:RRD,
  author =       "Qizhen Zhang and Philip A. Bernstein and Daniel S.
                 Berger and Badrish Chandramouli",
  title =        "{Redy}: remote dynamic memory cache",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "766--779",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503587",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503587",
  abstract =     "Redy is a cloud service that provides high performance
                 caches using RDMA-accessible remote memory. An
                 application can customize the performance of each cache
                 with a service level objective (SLO) for latency and
                 throughput. By using remote memory, it can \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Boissier:2021:RBC,
  author =       "Martin Boissier",
  title =        "Robust and budget-constrained encoding configurations
                 for in-memory database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "780--793",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503588",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503588",
  abstract =     "Data encoding has been applied to database systems for
                 decades as it mitigates bandwidth bottlenecks and
                 reduces storage requirements. But even in the presence
                 of these advantages, most in-memory database systems
                 use data encoding only conservatively as \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tan:2021:FNR,
  author =       "Shulong Tan and Weijie Zhao and Ping Li",
  title =        "Fast neural ranking on bipartite graph indices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "794--803",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503589",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503589",
  abstract =     "Neural network based ranking has been widely adopted
                 owing to its powerful capacity in modeling complex
                 relationships (e.g., users and items, questions and
                 answers). Online neural network ranking, i.e., the so
                 called fast neural ranking, is considered a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gan:2021:BSD,
  author =       "Shaoduo Gan and Jiawei Jiang and Binhang Yuan and Ce
                 Zhang and Xiangru Lian and Rui Wang and Jianbin Chang
                 and Chengjun Liu and Hongmei Shi and Shengzhuo Zhang
                 and Xianghong Li and Tengxu Sun and Sen Yang and Ji
                 Liu",
  title =        "{Bagua}: scaling up distributed learning with system
                 relaxations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "804--813",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503590",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503590",
  abstract =     "Recent years have witnessed a growing list of systems
                 for distributed data-parallel training. Existing
                 systems largely fit into two paradigms, i.e., parameter
                 server and MPI-style collective operations. On the
                 algorithmic side, researchers have proposed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chan:2021:SCO,
  author =       "Tsz Nam Chan and Pak Lon Ip and Leong Hou U. and Byron
                 Choi and Jianliang Xu",
  title =        "{SWS}: a complexity-optimized solution for
                 spatial-temporal kernel density visualization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "814--827",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503591",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503591",
  abstract =     "Spatial-temporal kernel density visualization (STKDV)
                 has been extensively used in a wide range of
                 applications, e.g., disease outbreak analysis, traffic
                 accident hotspot detection, and crime hotspot
                 detection. While STKDV can provide accurate and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2021:PFA,
  author =       "Junxu Liu and Jian Lou and Li Xiong and Jinfei Liu and
                 Xiaofeng Meng",
  title =        "Projected federated averaging with heterogeneous
                 differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "828--840",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503592",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503592",
  abstract =     "Federated Learning (FL) is a promising framework for
                 multiple clients to learn a joint model without
                 directly sharing the data. In addition to high utility
                 of the joint model, rigorous privacy protection of the
                 data and communication efficiency are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Haimovich:2021:PPS,
  author =       "Daniel Haimovich and Dima Karamshuk and Thomas J.
                 Leeper and Evgeniy Riabenko and Milan Vojnovic",
  title =        "Popularity prediction for social media over arbitrary
                 time horizons",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "841--849",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503593",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503593",
  abstract =     "Predicting the popularity of social media content in
                 real time requires approaches that efficiently operate
                 at global scale. Popularity prediction is important for
                 many applications, including detection of harmful viral
                 content to enable timely content \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Doshi:2021:LWS,
  author =       "Ishita Doshi and Dhritiman Das and Ashish Bhutani and
                 Rajeev Kumar and Rushi Bhatt and Niranjan
                 Balasubramanian",
  title =        "{LANNS}: a web-scale approximate nearest neighbor
                 lookup system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "850--858",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503594",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503594",
  abstract =     "Nearest neighbor search (NNS) has a wide range of
                 applications in information retrieval, computer vision,
                 machine learning, databases, and other areas. Existing
                 state-of-the-art algorithm for nearest neighbor search,
                 Hierarchical Navigable Small World \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pena:2021:FDD,
  author =       "Eduardo H. M. Pena and Eduardo C. de Almeida and Felix
                 Naumann",
  title =        "Fast detection of denial constraint violations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "859--871",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503595",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503595",
  abstract =     "The detection of constraint-based errors is a critical
                 task in many data cleaning solutions. Previous works
                 perform the task either using traditional data
                 management systems or using specialized systems that
                 speed up error detection. Unfortunately, both
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yu:2021:CFF,
  author =       "Bowen Yu and Guanyu Feng and Huanqi Cao and Xiaohan Li
                 and Zhenbo Sun and Haojie Wang and Xiaowei Zhu and
                 Weimin Zheng and Wenguang Chen",
  title =        "{Chukonu}: a fully-featured high-performance big data
                 framework that integrates a native compute engine into
                 {Spark}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "872--885",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503596",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503596",
  abstract =     "Apache Spark is a widely deployed big data analytics
                 framework that offers such attractive features as
                 resiliency, load-balancing, and a rich ecosystem.
                 However, there is still plenty of room for improvement
                 in its performance. Although a data-parallel \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jin:2021:CNM,
  author =       "Sian Jin and Chengming Zhang and Xintong Jiang and
                 Yunhe Feng and Hui Guan and Guanpeng Li and Shuaiwen
                 Leon Song and Dingwen Tao",
  title =        "{COMET}: a novel memory-efficient deep learning
                 training framework by using error-bounded lossy
                 compression",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "886--899",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503597",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503597",
  abstract =     "Deep neural networks (DNNs) are becoming increasingly
                 deeper, wider, and non-linear due to the growing
                 demands on prediction accuracy and analysis quality.
                 Training wide and deep neural networks require large
                 amounts of storage resources such as memory \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2021:FMF,
  author =       "Zitao Li and Bolin Ding and Ce Zhang and Ninghui Li
                 and Jingren Zhou",
  title =        "Federated matrix factorization with privacy
                 guarantee",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "900--913",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503598",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503598",
  abstract =     "Matrix factorization (MF) approximates unobserved
                 ratings in a rating matrix, whose rows correspond to
                 users and columns correspond to items to be rated, and
                 has been serving as a fundamental building block in
                 recommendation systems. This paper \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Duong:2021:SRG,
  author =       "Chi Thang Duong and Trung Dung Hoang and Hongzhi Yin
                 and Matthias Weidlich and Quoc Viet Hung Nguyen and
                 Karl Aberer",
  title =        "Scalable robust graph embedding with {Spark}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "914--922",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503599",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503599",
  abstract =     "Graph embedding aims at learning a vector-based
                 representation of vertices that incorporates the
                 structure of the graph. This representation then
                 enables inference of graph properties. Existing graph
                 embedding techniques, however, do not scale well to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Paul:2021:DWC,
  author =       "Debjyoti Paul and Jie Cao and Feifei Li and Vivek
                 Srikumar",
  title =        "Database workload characterization with query plan
                 encoders",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "923--935",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503600",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503600",
  abstract =     "Smart databases are adopting artificial intelligence
                 (AI) technologies to achieve instance optimality, and
                 in the future, databases will come with prepackaged AI
                 models within their core components. The reason is that
                 every database runs on different \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Modi:2021:NQO,
  author =       "Abhishek Modi and Kaushik Rajan and Srinivas Thimmaiah
                 and Prakhar Jain and Swinky Mann and Ayushi Agarwal and
                 Ajith Shetty and Shahid K. I. and Ashit Gosalia and
                 Partho Sarthi",
  title =        "New query optimization techniques in the {Spark}
                 engine of {Azure} synapse",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "936--948",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503601",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503601",
  abstract =     "The cost of big-data query execution is dominated by
                 stateful operators. These include sort and
                 hash-aggregate that typically materialize intermediate
                 data in memory, and exchange that materializes data to
                 disk and transfers data over the network. In \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sinthong:2021:DDQ,
  author =       "Phanwadee Sinthong and Dhaval Patel and Nianjun Zhou
                 and Shrey Shrivastava and Arun Iyengar and Anuradha
                 Bhamidipaty",
  title =        "{DQDF}: data-quality-aware dataframes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "949--957",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503602",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503602",
  abstract =     "Data quality assessment is an essential process of any
                 data analysis process including machine learning. The
                 process is time-consuming as it involves multiple
                 independent data quality checks that are performed
                 iteratively at scale on evolving data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Agarwal:2021:RGC,
  author =       "Archita Agarwal and Marilyn George and Aaron Jeyaraj
                 and Malte Schwarzkopf",
  title =        "Retrofitting {GDPR} compliance onto legacy databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "958--970",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503603",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503603",
  abstract =     "New privacy laws like the European Union's General
                 Data Protection Regulation (GDPR) require database
                 administrators (DBAs) to identify all information
                 related to an individual on request, e.g., to return or
                 delete it. This requires time-consuming \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2021:AAC,
  author =       "Xinle Wu and Dalin Zhang and Chenjuan Guo and Chaoyang
                 He and Bin Yang and Christian S. Jensen",
  title =        "{AutoCTS}: automated correlated time series
                 forecasting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "971--983",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503604",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503604",
  abstract =     "Correlated time series (CTS) forecasting plays an
                 essential role in many cyber-physical systems, where
                 multiple sensors emit time series that capture
                 interconnected processes. Solutions based on deep
                 learning that deliver state-of-the-art CTS \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sudhir:2021:RLM,
  author =       "Sivaprasad Sudhir and Michael Cafarella and Samuel
                 Madden",
  title =        "Replicated layout for in-memory database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "4",
  pages =        "984--997",
  month =        dec,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3503585.3503606",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Apr 15 06:48:40 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3503585.3503606",
  abstract =     "Scanning and filtering are the foundations of
                 analytical database systems. Modern DBMSs employ a
                 variety of techniques to partition and layout data to
                 improve the performance of these operations. To
                 accelerate query performance, systems tune data layout
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sanghi:2022:PCD,
  author =       "Anupam Sanghi and Shadab Ahmed and Jayant R. Haritsa",
  title =        "Projection-compliant database generation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "998--1010",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510398",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510398",
  abstract =     "Synthesizing data using declarative formalisms has
                 been persuasively advocated in contemporary data
                 generation frameworks. In particular, they specify
                 operator output volumes through row-cardinality
                 constraints. However, thus far, adherence to these
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jin:2022:MRE,
  author =       "Guodong Jin and Semih Salihoglu",
  title =        "Making {RDBMSs} efficient on graph workloads through
                 predefined joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1011--1023",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510400",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510400",
  abstract =     "Joins in native graph database management systems
                 (GDBMSs) are predefined to the system as edges, which
                 are indexed in adjacency list indices and serve as
                 pointers. This contrasts with and can be more
                 performant than value-based joins in RDBMSs. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Deep:2022:REJ,
  author =       "Shaleen Deep and Xiao Hu and Paraschos Koutris",
  title =        "Ranked enumeration of join queries with projections",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1024--1037",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510401",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510401",
  abstract =     "Join query evaluation with ordering is a fundamental
                 data processing task in relational database management
                 systems. SQL and custom graph query languages such as
                 Cypher offer this functionality by allowing users to
                 specify the order via the ORDER BY \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shin:2022:HSC,
  author =       "Ahnjae Shin and Joo Seong Jeong and Do Yoon Kim and
                 Soyoung Jung and Byung-Gon Chun",
  title =        "Hippo: sharing computations in hyper-parameter
                 optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1038--1052",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510402",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510402",
  abstract =     "Hyper-parameter optimization is crucial for pushing
                 the accuracy of a deep learning model to its limits.
                 However, a hyper-parameter optimization job, referred
                 to as a study, involves numerous trials of training a
                 model using different training knobs, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rinberg:2022:DJC,
  author =       "Arik Rinberg and Tomer Solomon and Roee Shlomo and Guy
                 Khazma and Gal Lushi and Idit Keidar and Paula
                 Ta-Shma",
  title =        "{DSON}: {JSON CRDT} using delta-mutations for document
                 stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1053--1065",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510403",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510403",
  abstract =     "We propose DSON, a space efficient $ \delta $-based
                 CRDT approach for distributed JSON document stores,
                 enabling high availability at a global scale, while
                 providing strong eventual consistency guarantees. We
                 define the semantics of our CRDT based approach
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zeighami:2022:NDD,
  author =       "Sepanta Zeighami and Ritesh Ahuja and Gabriel Ghinita
                 and Cyrus Shahabi",
  title =        "A neural database for differentially private spatial
                 range queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1066--1078",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510404",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510404",
  abstract =     "Mobile apps and location-based services generate large
                 amounts of location data. Location density information
                 from such datasets benefits research on traffic
                 optimization, context-aware notifications and public
                 health (e.g., disease spread). To preserve \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Maltry:2022:CAR,
  author =       "Marcel Maltry and Jens Dittrich",
  title =        "A critical analysis of recursive model indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1079--1091",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510405",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510405",
  abstract =     "The recursive model index (RMI) has recently been
                 introduced as a machine-learned replacement for
                 traditional indexes over sorted data, achieving
                 remarkably fast lookups. Follow-up work focused on
                 explaining RMI's performance and automatically
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ge:2022:HBD,
  author =       "Zerui Ge and Dumitrel Loghin and Beng Chin Ooi and
                 Pingcheng Ruan and Tianwen Wang",
  title =        "Hybrid blockchain database systems: design and
                 performance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1092--1104",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510406",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510406",
  abstract =     "With the emergence of hybrid blockchain database
                 systems, we aim to provide an in-depth analysis of the
                 performance and trade-offs among a few representative
                 systems. To achieve this goal, we implement Veritas and
                 BlockchainDB from scratch. For Veritas, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bonifati:2022:TQT,
  author =       "Angela Bonifati and Stefania Dumbrava and George
                 Fletcher and Jan Hidders and Matthias Hofer and Wim
                 Martens and Filip Murlak and Joshua Shinavier and
                 S{\l}awek Staworko and Dominik Tomaszuk",
  title =        "Threshold queries in theory and in the wild",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1105--1118",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510407",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510407",
  abstract =     "Threshold queries are an important class of queries
                 that only require computing or counting answers up to a
                 specified threshold value. To the best of our
                 knowledge, threshold queries have been largely
                 disregarded in the research literature, which is
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sichert:2022:UDO,
  author =       "Moritz Sichert and Thomas Neumann",
  title =        "User-defined operators: efficiently integrating custom
                 algorithms into modern databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "5",
  pages =        "1119--1131",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3510397.3510408",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed May 25 08:14:25 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3510397.3510408",
  abstract =     "In recent years, complex data mining and machine
                 learning algorithms have become more common in data
                 analytics. Several specialized systems exist to
                 evaluate these algorithms on ever-growing data sets,
                 which are built to efficiently execute different
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:PEP,
  author =       "Yue Wang and Vivek Narasayya and Yeye He and Surajit
                 Chaudhuri",
  title =        "{PACk}: an efficient partition-based distributed
                 agglomerative hierarchical clustering algorithm for
                 deduplication",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1132--1145",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514062",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514062",
  abstract =     "The Agglomerative Hierarchical Clustering (AHC)
                 algorithm is widely used in real-world applications. As
                 data volumes continue to grow, efficient scale-out
                 techniques for AHC are becoming increasingly important.
                 In this paper, we propose a Partition-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chang:2022:NOA,
  author =       "Lijun Chang and Zhiyi Wang",
  title =        "A near-optimal approach to edge connectivity-based
                 hierarchical graph decomposition",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1146--1158",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514063",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514063",
  abstract =     "Driven by applications in graph analytics, the problem
                 of efficiently computing all k -edge connected
                 components ( k -ECCs) of a graph G for a user-given k
                 has been extensively and well studied. It is known that
                 the k -ECCs of G for all possible values of k
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tong:2022:HFE,
  author =       "Yongxin Tong and Xuchen Pan and Yuxiang Zeng and
                 Yexuan Shi and Chunbo Xue and Zimu Zhou and Xiaofei
                 Zhang and Lei Chen and Yi Xu and Ke Xu and Weifeng Lv",
  title =        "{Hu-Fu}: efficient and secure spatial queries over
                 data federation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1159--1172",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514064",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514064",
  abstract =     "Data isolation has become an obstacle to scale up
                 query processing over big data, since sharing raw data
                 among data owners is often prohibitive due to security
                 concerns. A promising solution is to perform secure
                 queries over a federation of multiple \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fuchs:2022:SUT,
  author =       "Per Fuchs and Domagoj Margan and Jana Giceva",
  title =        "{Sortledton}: a universal, transactional graph data
                 structure",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1173--1186",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514065",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514065",
  abstract =     "Despite the wide adoption of graph processing across
                 many different application domains, there is no
                 underlying data structure that can serve a variety of
                 graph workloads (analytics, traversals, and pattern
                 matching) on dynamic graphs with transactional
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2022:NLF,
  author =       "Bowen Zhang and Shengan Zheng and Zhenlin Qi and
                 Linpeng Huang",
  title =        "{NBTree}: a lock-free {PM}-friendly persistent
                 {B+}-tree for {eADR}-enabled {PM} systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1187--1200",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514066",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514066",
  abstract =     "Persistent memory (PM) promises near-DRAM performance
                 as well as data persistency. Recently, a new feature
                 called eADR is available on the 2$^{nd}$ generation
                 Intel Optane PM with the 3$^{rd}$ generation Intel Xeon
                 Scalable Processors. eADR ensures that data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tuli:2022:TDT,
  author =       "Shreshth Tuli and Giuliano Casale and Nicholas R.
                 Jennings",
  title =        "{TranAD}: deep transformer networks for anomaly
                 detection in multivariate time series data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1201--1214",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514067",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514067",
  abstract =     "Efficient anomaly detection and diagnosis in
                 multivariate time-series data is of great importance
                 for modern industrial applications. However, building a
                 system that is able to quickly and accurately pinpoint
                 anomalous observations is a challenging \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2022:SPO,
  author =       "Fuheng Zhao and Divyakant Agrawal and Amr {El Abbadi}
                 and Ahmed Metwally",
  title =        "{SpaceSaving$ \pm $}: an optimal algorithm for
                 frequency estimation and frequent items in the
                 bounded-deletion model",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1215--1227",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514068",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See errata \cite{Zhao:2023:ESO}.",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514068",
  abstract =     "In this paper, we propose the first deterministic
                 algorithms to solve the frequency estimation and
                 frequent item problems in the bounded-deletion model.
                 We establish the space lower bound for solving the
                 deterministic frequent items problem in the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zheng:2022:BEG,
  author =       "Chenguang Zheng and Hongzhi Chen and Yuxuan Cheng and
                 Zhezheng Song and Yifan Wu and Changji Li and James
                 Cheng and Hao Yang and Shuai Zhang",
  title =        "{ByteGNN}: efficient graph neural network training at
                 large scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1228--1242",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514069",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514069",
  abstract =     "Graph neural networks (GNNs) have shown excellent
                 performance in a wide range of applications such as
                 recommendation, risk control, and drug discovery. With
                 the increase in the volume of graph data, distributed
                 GNN systems become essential to support \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jiang:2022:QDG,
  author =       "Yuli Jiang and Yu Rong and Hong Cheng and Xin Huang
                 and Kangfei Zhao and Junzhou Huang",
  title =        "Query driven-graph neural networks for community
                 search: from non-attributed, attributed, to interactive
                 attributed",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1243--1255",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514070",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514070",
  abstract =     "Given one or more query vertices, Community Search
                 (CS) aims to find densely intra-connected and loosely
                 inter-connected structures containing query vertices.
                 Attributed Community Search (ACS), a related problem,
                 is more challenging since it finds \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2022:HTT,
  author =       "Yang Li and Yu Shen and Huaijun Jiang and Wentao Zhang
                 and Jixiang Li and Ji Liu and Ce Zhang and Bin Cui",
  title =        "Hyper-tune: towards efficient hyper-parameter tuning
                 at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1256--1265",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514071",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514071",
  abstract =     "The ever-growing demand and complexity of machine
                 learning are putting pressure on hyper-parameter tuning
                 systems: while the evaluation cost of models continues
                 to increase, the scalability of state-of-the-arts
                 starts to become a crucial bottleneck. In \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Minartz:2022:MCD,
  author =       "Koen Minartz and Jens E. d'Hondt and Odysseas
                 Papapetrou",
  title =        "Multivariate correlations discovery in static and
                 streaming data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1266--1278",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514072",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514072",
  abstract =     "Correlation analysis is an invaluable tool in many
                 domains, for better understanding data and extracting
                 salient insights. Most works to date focus on detecting
                 high pairwise correlations. A generalization of this
                 problem with known applications but no \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Poppe:2022:MPA,
  author =       "Olga Poppe and Qun Guo and Willis Lang and Pankaj
                 Arora and Morgan Oslake and Shize Xu and Ajay Kalhan",
  title =        "{Moneyball}: proactive auto-scaling in {Microsoft
                 Azure SQL} database serverless",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1279--1287",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514073",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514073",
  abstract =     "Microsoft Azure SQL Database is among the leading
                 relational database service providers in the cloud.
                 Serverless compute automatically scales resources based
                 on workload demand. When a database becomes idle its
                 resources are reclaimed. When activity \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cheng:2022:PRP,
  author =       "Kewei Cheng and Xian Li and Yifan Ethan Xu and Xin
                 Luna Dong and Yizhou Sun",
  title =        "{PGE}: robust product graph embedding learning for
                 error detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1288--1296",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514074",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514074",
  abstract =     "Although product graphs (PGs) have gained increasing
                 attentions in recent years for their successful
                 applications in product search and recommendations, the
                 extensive power of PGs can be limited by the inevitable
                 involvement of various kinds of errors. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Manne:2022:CMR,
  author =       "Naga Nithin Manne and Shilvi Satpati and Tanu Malik
                 and Amitabha Bagchi and Ashish Gehani and Amitabh
                 Chaudhary",
  title =        "{CHEX}: multiversion replay with ordered checkpoints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "6",
  pages =        "1297--1310",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3514061.3514075",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3514061.3514075",
  abstract =     "In scientific computing and data science disciplines,
                 it is often necessary to share application workflows
                 and repeat results. Current tools containerize
                 application workflows, and share the resulting
                 container for repeating results. These tools, due to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Even:2022:PFP,
  author =       "Tomer Even and Guy Even and Adam Morrison",
  title =        "Prefix filter: practically and theoretically better
                 than bloom",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1311--1323",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523211",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523211",
  abstract =     "Many applications of approximate membership query data
                 structures, or filters, require only an incremental
                 filter that supports insertions but not deletions.
                 However, the design space of incremental filters is
                 missing a ``sweet spot'' filter that combines
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yamada:2022:SDS,
  author =       "Hiroyuki Yamada and Jun Nemoto",
  title =        "{Scalar DL}: scalable and practical {Byzantine} fault
                 detection for transactional database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1324--1336",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523212",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523212",
  abstract =     "This paper presents Scalar DL, a Byzantine fault
                 detection (BFD) middleware for transactional database
                 systems. Scalar DL manages two separately administered
                 database replicas in a database system and can detect
                 Byzantine faults in the database system \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kim:2022:NLR,
  author =       "Gyuyeong Kim and Wonjun Lee",
  title =        "In-network leaderless replication for distributed data
                 stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1337--1349",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523213",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523213",
  abstract =     "Leaderless replication allows any replica to handle
                 any type of request to achieve read scalability and
                 high availability for distributed data stores. However,
                 this entails burdensome coordination overhead of
                 replication protocols, degrading write \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2022:FAC,
  author =       "Xin Sun and Xin Huang and Di Jin",
  title =        "Fast algorithms for core maximization on large
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1350--1362",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523214",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523214",
  abstract =     "Core maximization, that enlarges the k -core as much
                 as possible by inserting a few new edges into a graph,
                 is particularly useful for social group engagement and
                 network stability improvement. However, the core
                 maximization problem has been \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pan:2022:NSC,
  author =       "Shuye Pan and Peng Wang and Chen Wang and Wei Wang and
                 Jianmin Wang",
  title =        "{NLC}: search correlated window pairs on long time
                 series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1363--1375",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523215",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523215",
  abstract =     "Nowadays, many applications, like Internet of Things
                 and Industrial Internet, collect data points from
                 sensors continuously to form long time series. Finding
                 correlation between time series is a fundamental task
                 for many time series mining problems. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:EBL,
  author =       "Hanzhi Wang and Zhewei Wei and Junhao Gan and Ye Yuan
                 and Xiaoyong Du and Ji-Rong Wen",
  title =        "Edge-based local push for personalized {PageRank}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1376--1389",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523216",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523216",
  abstract =     "Personalized PageRank (PPR) is a popular node
                 proximity metric in graph mining and network research.
                 A single-source PPR (SSPPR) query asks for the PPR
                 value of each node on the graph. Due to its importance
                 and wide applications, decades of efforts have
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chan:2022:CSD,
  author =       "Harry Kai-Ho Chan and Huan Li and Xiao Li and Hua Lu",
  title =        "Continuous social distance monitoring in indoor
                 space",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1390--1402",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523217",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523217",
  abstract =     "The COVID-19 pandemic has caused over 6 million deaths
                 since 2020. To contain the spread of the virus, social
                 distancing is one of the most simple yet effective
                 approaches. Motivated by this, in this paper we study
                 the problem of continuous social \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2022:DSC,
  author =       "Xibo Sun and Shixuan Sun and Qiong Luo and Bingsheng
                 He",
  title =        "An in-depth study of continuous subgraph matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1403--1416",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523218",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523218",
  abstract =     "Continuous subgraph matching (CSM) algorithms find the
                 occurrences of a given pattern on a stream of data
                 graphs online. A number of incremental CSM algorithms
                 have been proposed. However, a systematical study on
                 these algorithms is missing to identify \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mishra:2022:OST,
  author =       "Abhinav Mishra and Ram Sriharsha and Sichen Zhong",
  title =        "{OnlineSTL}: scaling time series decomposition by
                 $100 \times$",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1417--1425",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523219",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523219",
  abstract =     "Decomposing a complex time series into trend,
                 seasonality, and remainder components is an important
                 primitive that facilitates time series anomaly
                 detection, change point detection, and forecasting.
                 Although numerous batch algorithms are known for time
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2022:SSS,
  author =       "Haoyu Li and Qizhi Chen and Yixin Zhang and Tong Yang
                 and Bin Cui",
  title =        "{Stingy sketch}: a sketch framework for accurate and
                 fast frequency estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1426--1438",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523220",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523220",
  abstract =     "Recording the frequency of items in highly skewed data
                 streams is a fundamental and hot problem in recent
                 years. The literature demonstrates that sketch is the
                 most promising solution. The typical metrics to measure
                 a sketch are accuracy and speed, but \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:SDP,
  author =       "Yang Wang and Miao Yu and Yujie Hui and Fang Zhou and
                 Yuyang Huang and Rui Zhu and Xueyuan Ren and Tianxi Li
                 and Xiaoyi Lu",
  title =        "A study of database performance sensitivity to
                 experiment settings",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1439--1452",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523221",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523221",
  abstract =     "To allow performance comparison across different
                 systems, our community has developed multiple
                 benchmarks, such as TPC-C and YCSB, which are widely
                 used. However, despite such effort, interpreting and
                 comparing performance numbers is still a challenging
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chao:2022:ITC,
  author =       "Zemin Chao and Hong Gao and Yinan An and Jianzhong
                 Li",
  title =        "The inherent time complexity and an efficient
                 algorithm for subsequence matching problem",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1453--1465",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523222",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523222",
  abstract =     "Subsequence matching is an important and fundamental
                 problem on time series data. This paper studies the
                 inherent time complexity of the subsequence matching
                 problem and designs a more efficient algorithm for
                 solving the problem. Firstly, it is proved \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chai:2022:SDA,
  author =       "Chengliang Chai and Jiabin Liu and Nan Tang and
                 Guoliang Li and Yuyu Luo",
  title =        "Selective data acquisition in the wild for model
                 charging",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1466--1478",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523223",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523223",
  abstract =     "The lack of sufficient labeled data is a key
                 bottleneck for practitioners in many real-world
                 supervised machine learning (ML) tasks. In this paper,
                 we study a new problem, namely selective data
                 acquisition in the wild for model charging: given a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2022:DAR,
  author =       "Wenfei Fan and Wenzhi Fu and Ruochun Jin and Ping Lu
                 and Chao Tian",
  title =        "Discovering association rules from big graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1479--1492",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523224",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523224",
  abstract =     "This paper tackles two challenges to discovery of
                 graph rules. Existing discovery methods often (a)
                 return an excessive number of rules, and (b) do not
                 scale with large graphs given the intractability of the
                 discovery problem. We propose an application-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Han:2022:DEE,
  author =       "Xiaolin Han and Reynold Cheng and Chenhao Ma and
                 Tobias Grubenmann",
  title =        "{DeepTEA}: effective and efficient online
                 time-dependent trajectory outlier detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1493--1505",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523225",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523225",
  abstract =     "In this paper, we study anomalous trajectory
                 detection, which aims to extract abnormal movements of
                 vehicles on the roads. This important problem, which
                 facilitates understanding of traffic behavior and
                 detection of taxi fraud, is challenging due to the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Simonini:2022:ERD,
  author =       "Giovanni Simonini and Luca Zecchini and Sonia
                 Bergamaschi and Felix Naumann",
  title =        "Entity resolution on-demand",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "7",
  pages =        "1506--1518",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3523210.3523226",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:18 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3523210.3523226",
  abstract =     "Entity Resolution (ER) aims to identify and merge
                 records that refer to the same real-world entity. ER is
                 typically employed as an expensive cleaning step on the
                 entire data before consuming it. Yet, determining which
                 entities are useful once cleaned \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Alhazmi:2022:FBC,
  author =       "Afnan Alhazmi and Tom Blount and George
                 Konstantinidis",
  title =        "{ForBackBench}: a benchmark for chasing vs.
                 query-rewriting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1519--1532",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529338",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529338",
  abstract =     "The problems of Data Integration/Exchange (DE) and
                 Ontology Based Data Access (OBDA) have been extensively
                 studied across different communities. The underlying
                 problem is common: using a number of differently
                 structured data-sources mapped to a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2022:ASB,
  author =       "Jeremy Chen and Yuqing Huang and Mushi Wang and Semih
                 Salihoglu and Ken Salem",
  title =        "Accurate summary-based cardinality estimation through
                 the lens of cardinality estimation graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1533--1545",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529339",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529339",
  abstract =     "This paper is an experimental and analytical study of
                 two classes of summary-based cardinality estimators
                 that use statistics about input relations and
                 small-size joins in the context of graph database
                 management systems: (i) optimistic estimators that
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liao:2022:DDC,
  author =       "Xuankun Liao and Qing Liu and Jiaxin Jiang and Xin
                 Huang and Jianliang Xu and Byron Choi",
  title =        "Distributed {D-core} decomposition over large directed
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1546--1558",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529340",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529340",
  abstract =     "Given a directed graph $G$ and integers $k$ and $l$, a
                 D-core is the maximal subgraph $H \subseteq G$ such
                 that for every vertex of $H$, its in-degree and
                 out-degree are no smaller than $k$ and $l$,
                 respectively. For a directed graph $G$, the problem of
                 D-core decomposition \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2022:EMB,
  author =       "Lu Chen and Chengfei Liu and Rui Zhou and Jiajie Xu
                 and Jianxin Li",
  title =        "Efficient maximal biclique enumeration for large
                 sparse bipartite graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1559--1571",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529341",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529341",
  abstract =     "Maximal bicliques are effective to reveal meaningful
                 information hidden in bipartite graphs. Maximal
                 biclique enumeration (MBE) is challenging since the
                 number of the maximal bicliques grows exponentially
                 w.r.t. the number of vertices in a bipartite \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2022:TGF,
  author =       "Hongkuan Zhou and Da Zheng and Israt Nisa and
                 Vasileios Ioannidis and Xiang Song and George Karypis",
  title =        "{TGL}: a general framework for temporal {GNN} training
                 on billion-scale graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1572--1580",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529342",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529342",
  abstract =     "Many real world graphs contain time domain
                 information. Temporal Graph Neural Networks capture
                 temporal information as well as structural and
                 contextual information in the generated dynamic node
                 embeddings. Researchers have shown that these
                 embeddings \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yuan:2022:DLF,
  author =       "Binhang Yuan and Cameron R. Wolfe and Chen Dun and
                 Yuxin Tang and Anastasios Kyrillidis and Chris
                 Jermaine",
  title =        "Distributed learning of fully connected neural
                 networks using independent subnet training",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1581--1590",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529343",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529343",
  abstract =     "Distributed machine learning (ML) can bring more
                 computational resources to bear than single-machine
                 learning, thus enabling reductions in training time.
                 Distributed learning partitions models and data over
                 many machines, allowing model and dataset \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Burckhardt:2022:NEE,
  author =       "Sebastian Burckhardt and Badrish Chandramouli and
                 Chris Gillum and David Justo and Konstantinos Kallas
                 and Connor McMahon and Christopher S. Meiklejohn and
                 Xiangfeng Zhu",
  title =        "{Netherite}: efficient execution of serverless
                 workflows",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1591--1604",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529344",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529344",
  abstract =     "Serverless is a popular choice for cloud service
                 architects because it can provide scalability and
                 load-based billing with minimal developer effort.
                 Functions-as-a-service (FaaS) are originally stateless,
                 but emerging frameworks add stateful \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huynh:2022:ERT,
  author =       "Andy Huynh and Harshal A. Chaudhari and Evimaria Terzi
                 and Manos Athanassoulis",
  title =        "{Endure}: a robust tuning paradigm for {LSM} trees
                 under workload uncertainty",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1605--1618",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529345",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529345",
  abstract =     "Log-Structured Merge trees (LSM trees) are
                 increasingly used as the storage engines behind several
                 data systems, frequently deployed in the cloud. Similar
                 to other database architectures, LSM trees consider
                 information about the expected workload (e.g.,
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2022:EDB,
  author =       "Hongzheng Li and Yingxia Shao and Junping Du and Bin
                 Cui and Lei Chen",
  title =        "An {I/O}-efficient disk-based graph system for
                 scalable second-order random walk of large graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1619--1631",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529346",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529346",
  abstract =     "Random walk is widely used in many graph analysis
                 tasks, especially the first-order random walk. However,
                 as a simplification of real-world problems, the
                 first-order random walk is poor at modeling
                 higher-order structures in the data. Recently, second-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vaidya:2022:SLE,
  author =       "Kapil Vaidya and Subarna Chatterjee and Eric Knorr and
                 Michael Mitzenmacher and Stratos Idreos and Tim
                 Kraska",
  title =        "{SNARF}: a learning-enhanced range filter",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1632--1644",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529347",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529347",
  abstract =     "We present Sparse Numerical Array-Based Range Filters
                 (SNARF), a learned range filter that efficiently
                 supports range queries for numerical data. SNARF
                 creates a model of the data distribution to map the
                 keys into a bit array which is stored in a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2022:DEI,
  author =       "Xin Chen and You Peng and Sibo Wang and Jeffrey Xu
                 Yu",
  title =        "{DLCR}: efficient indexing for label-constrained
                 reachability queries on large dynamic graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1645--1657",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529348",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529348",
  abstract =     "Many real-world graphs, e.g., social networks,
                 biological networks, knowledge graphs, naturally come
                 with edge-labels, with different labels representing
                 different relationships between nodes. On such
                 edge-labeled graphs, an important query is the label-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2022:QTT,
  author =       "Yue Zhao and Gao Cong and Jiachen Shi and Chunyan
                 Miao",
  title =        "{QueryFormer}: a tree transformer model for query plan
                 representation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1658--1670",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529349",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529349",
  abstract =     "Machine learning has become a prominent method in many
                 database optimization problems such as cost estimation,
                 index selection and query optimization. Translating
                 query execution plans into their vectorized
                 representations is non-trivial. Recently, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2022:ICI,
  author =       "Leon Lee and Siphrey Xie and Yunus Ma and Shimin
                 Chen",
  title =        "Index checkpoints for instant recovery in in-memory
                 database systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1671--1683",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529350",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529350",
  abstract =     "We observe that the time bottleneck during the
                 recovery phase of an IMDB (In-Memory DataBase system)
                 shifts from log replaying to index rebuilding after the
                 state-of-art techniques for instant recovery have been
                 applied. In this paper, we investigate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Esmailoghli:2022:MMA,
  author =       "Mahdi Esmailoghli and Jorge-Arnulfo Quian{\'e}-Ruiz
                 and Ziawasch Abedjan",
  title =        "{MATE}: multi-attribute table extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1684--1696",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529353",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529353",
  abstract =     "A core operation in data discovery is to find joinable
                 tables for a given table. Real-world tables include
                 both unary and n-ary join keys. However, existing table
                 discovery systems are optimized for unary joins and are
                 ineffective and slow in the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Paparrizos:2022:TUE,
  author =       "John Paparrizos and Yuhao Kang and Paul Boniol and
                 Ruey S. Tsay and Themis Palpanas and Michael J.
                 Franklin",
  title =        "{TSB-UAD}: an end-to-end benchmark suite for
                 univariate time-series anomaly detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1697--1711",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529354",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529354",
  abstract =     "The detection of anomalies in time series has gained
                 ample academic and industrial attention. However, no
                 comprehensive benchmark exists to evaluate time-series
                 anomaly detection methods. It is common to use (i)
                 proprietary or synthetic data, often \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Leone:2022:CRE,
  author =       "Manuel Leone and Stefano Huber and Akhil Arora and
                 Alberto Garc{\'\i}a-Dur{\'a}n and Robert West",
  title =        "A critical re-evaluation of neural methods for entity
                 alignment",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1712--1725",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529355",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529355",
  abstract =     "Neural methods have become the de-facto choice for the
                 vast majority of data analysis tasks, and entity
                 alignment (EA) is no exception. Not surprisingly, more
                 than 50 different neural EA methods have been published
                 since 2017. However, surprisingly, an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Paganelli:2022:AHB,
  author =       "Matteo Paganelli and Francesco {Del Buono} and Andrea
                 Baraldi and Francesco Guerra",
  title =        "Analyzing how {BERT} performs entity matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "8",
  pages =        "1726--1738",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3529337.3529356",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 24 09:22:19 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3529337.3529356",
  abstract =     "State-of-the-art Entity Matching (EM) approaches rely
                 on transformer architectures, such as BERT, for
                 generating highly contex-tualized embeddings of terms.
                 The embeddings are then used to predict whether pairs
                 of entity descriptions refer to the same \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Arun:2022:SBF,
  author =       "Balaji Arun and Binoy Ravindran",
  title =        "Scalable {Byzantine} fault tolerance via partial
                 decentralization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1739--1752",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538599",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538599",
  abstract =     "Byzantine consensus is a critical component in many
                 permissioned Blockchains and distributed ledgers. We
                 propose a new paradigm for designing BFT protocols
                 called DQBFT that addresses three major performance and
                 scalability challenges that plague past \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2022:EEB,
  author =       "Huan Li and Lanjing Yi and Bo Tang and Hua Lu and
                 Christian S. Jensen",
  title =        "Efficient and error-bounded spatiotemporal quantile
                 monitoring in edge computing environments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1753--1765",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538600",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538600",
  abstract =     "Underlying many types of data analytics, a
                 spatiotemporal quantile monitoring (SQM) query
                 continuously returns the quantiles of a dataset
                 observed in a spatiotemporal range. In this paper, we
                 study SQM in an Internet of Things (IoT) based edge
                 computing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kato:2022:HDP,
  author =       "Fumiyuki Kato and Tsubasa Takahashi and Shun Takagi
                 and Yang Cao and Seng Pei Liew and Masatoshi
                 Yoshikawa",
  title =        "{HDPView}: differentially private materialized view
                 for exploring high dimensional relational data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1766--1778",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538601",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538601",
  abstract =     "How can we explore the unknown properties of
                 high-dimensional sensitive relational data while
                 preserving privacy? We study how to construct an
                 explorable privacy-preserving materialized view under
                 differential privacy. No existing state-of-the-art
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schmidl:2022:ADT,
  author =       "Sebastian Schmidl and Phillip Wenig and Thorsten
                 Papenbrock",
  title =        "Anomaly detection in time series: a comprehensive
                 evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1779--1797",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538602",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538602",
  abstract =     "Detecting anomalous subsequences in time series data
                 is an important task in areas ranging from
                 manufacturing processes over finance applications to
                 health care monitoring. An anomaly can indicate
                 important events, such as production faults, delivery
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Youngmann:2022:GED,
  author =       "Brit Youngmann and Sihem Amer-Yahia and Aurelien
                 Personnaz",
  title =        "Guided exploration of data summaries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1798--1807",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538603",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538603",
  abstract =     "Data summarization is the process of producing
                 interpretable and representative subsets of an input
                 dataset. It is usually performed following a one-shot
                 process with the purpose of finding the best summary. A
                 useful summary contains k individually \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2022:FDT,
  author =       "Xinyi Zhang and Zhuo Chang and Yang Li and Hong Wu and
                 Jian Tan and Feifei Li and Bin Cui",
  title =        "Facilitating database tuning with hyper-parameter
                 optimization: a comprehensive experimental evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1808--1821",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538604",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538604",
  abstract =     "Recently, using automatic configuration tuning to
                 improve the performance of modern database management
                 systems (DBMSs) has attracted increasing interest from
                 the database community. This is embodied with a number
                 of systems featuring advanced tuning \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:ESV,
  author =       "Zuan Wang and Xiaofeng Ding and Hai Jin and Pan Zhou",
  title =        "Efficient secure and verifiable location-based skyline
                 queries over encrypted data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1822--1834",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538605",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538605",
  abstract =     "Supporting secure location-based services on encrypted
                 data that is outsourced to cloud computing platforms
                 remains an ongoing challenge for efficiency due to
                 expensive ciphertext calculation overhead. Furthermore,
                 since the clouds may not be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2022:TIC,
  author =       "Zhuoyue Zhao and Dong Xie and Feifei Li",
  title =        "{AB-tree}: index for concurrent random sampling and
                 updates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1835--1847",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538606",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538606",
  abstract =     "There has been an increasing demand for real-time data
                 analytics. Approximate Query Processing (AQP) is a
                 popular option for that because it can use random
                 sampling to trade some accuracy for lower query
                 latency. However, the state-of-the-art AQP system
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fang:2022:RTR,
  author =       "Chenguang Fang and Shaoxu Song and Yinan Mei",
  title =        "On repairing timestamps for regular interval time
                 series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1848--1860",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538607",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538607",
  abstract =     "Time series data are often with regular time
                 intervals, e.g., in IoT scenarios sensor data collected
                 with a pre-specified frequency, air quality data
                 regularly recorded by outdoor monitors, and GPS signals
                 periodically received from multiple satellites.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2022:TEP,
  author =       "Wenfei Fan and Ruochun Jin and Ping Lu and Chao Tian
                 and Ruiqi Xu",
  title =        "Towards event prediction in temporal graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1861--1874",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538608",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538608",
  abstract =     "This paper proposes a class of temporal association
                 rules, denoted by TACOs, for event prediction. As
                 opposed to previous graph rules, TACOs monitor updates
                 to graphs, and can be used to capture temporal
                 interests in recommendation and catch frauds in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liang:2022:DCH,
  author =       "Yihuai Liang and Yan Li and Byeong-Seok Shin",
  title =        "Decentralized crowdsourcing for human intelligence
                 tasks with efficient on-chain cost",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1875--1888",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538609",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538609",
  abstract =     "Crowdsourcing for Human Intelligence Tasks (HIT) has
                 been widely used to crowdsource human knowledge, such
                 as image annotation for machine learning. We use a
                 public blockchain to play the role of traditional
                 centralized HIT systems, such that the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:TDB,
  author =       "Yue Wang and Ruiqi Xu and Xun Jian and Alexander Zhou
                 and Lei Chen",
  title =        "Towards distributed bitruss decomposition on bipartite
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1889--1901",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538610",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538610",
  abstract =     "Mining cohesive subgraphs on bipartite graphs is an
                 important task. The k -bitruss is one of many popular
                 cohesive subgraph models, which is the maximal subgraph
                 where each edge is contained in at least k butterflies.
                 The bitruss decomposition problem is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gagliardelli:2022:GSM,
  author =       "Luca Gagliardelli and George Papadakis and Giovanni
                 Simonini and Sonia Bergamaschi and Themis Palpanas",
  title =        "Generalized supervised meta-blocking",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1902--1910",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538611",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538611",
  abstract =     "Entity Resolution is a core data integration task that
                 relies on Blocking to scale to large datasets.
                 Schema-agnostic blocking achieves very high recall,
                 requires no domain knowledge and applies to data of any
                 structuredness and schema heterogeneity. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{An:2022:YRO,
  author =       "Mijin An and Soojun Im and Dawoon Jung and Sang-Won
                 Lee",
  title =        "Your read is our priority in flash storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1911--1923",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538612",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538612",
  abstract =     "When replacing a dirty victim page upon page miss, the
                 conventional buffer managers flush the dirty victim
                 first to the storage before reading the missing page.
                 This read-after-write (RAW) protocol, unfortunately,
                 causes the read stall problem on flash \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bhattacharya:2022:NWO,
  author =       "Arindam Bhattacharya and Chathur Gudesa and Amitabha
                 Bagchi and Srikanta Bedathur",
  title =        "New wine in an old bottle: data-aware hash functions
                 for {Bloom} filters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1924--1936",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538613",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538613",
  abstract =     "In many applications of Bloom filters, it is possible
                 to exploit the patterns present in the inserted and
                 non-inserted keys to achieve more compression than the
                 standard Bloom filter. A new class of Bloom filters
                 called Learned Bloom filters use machine \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Peng:2022:SEA,
  author =       "Jingshu Peng and Zhao Chen and Yingxia Shao and Yanyan
                 Shen and Lei Chen and Jiannong Cao",
  title =        "{Sancus}: staleness-aware communication-avoiding
                 full-graph decentralized training in large-scale graph
                 neural networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1937--1950",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538614",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538614",
  abstract =     "Graph neural networks (GNNs) have emerged due to their
                 success at modeling graph data. Yet, it is challenging
                 for GNNs to efficiently scale to large graphs. Thus,
                 distributed GNNs come into play. To avoid communication
                 caused by expensive data movement \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bucchi:2022:CCE,
  author =       "Marco Bucchi and Alejandro Grez and Andr{\'e}s
                 Quintana and Cristian Riveros and Stijn Vansummeren",
  title =        "{CORE}: a complex event recognition engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1951--1964",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538615",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538615",
  abstract =     "Complex Event Recognition (CER) systems are a
                 prominent technology for finding user-defined query
                 patterns over large data streams in real time. CER
                 query evaluation is known to be computationally
                 challenging, since it requires maintaining a set of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cheng:2022:TEE,
  author =       "Audrey Cheng and Xiao Shi and Aaron Kabcenell and
                 Shilpa Lawande and Hamza Qadeer and Jason Chan and
                 Harrison Tin and Ryan Zhao and Peter Bailis and Mahesh
                 Balakrishnan and Nathan Bronson and Natacha Crooks and
                 Ion Stoica",
  title =        "{TAOBench}: an end-to-end benchmark for social network
                 workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "9",
  pages =        "1965--1977",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3538598.3538616",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Jul 28 06:16:23 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3538598.3538616",
  abstract =     "The continued emergence of large social network
                 applications has introduced a scale of data and query
                 volume that challenges the limits of existing data
                 stores. However, few benchmarks accurately simulate
                 these request patterns, leaving researchers in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kakaraparthy:2022:VHA,
  author =       "Aarati Kakaraparthy and Jignesh M. Patel and Brian P.
                 Kroth and Kwanghyun Park",
  title =        "{VIP} hashing: adapting to skew in popularity of data
                 on the fly",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "1978--1990",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547306",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547306",
  abstract =     "All data is not equally popular. Often, some portion
                 of data is more frequently accessed than the rest,
                 which causes a skew in popularity of the data items.
                 Adapting to this skew can improve performance, and this
                 topic has been studied extensively in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vincon:2022:NDP,
  author =       "Tobias Vin{\c{c}}on and Christian Kn{\"o}dler and
                 Leonardo Solis-Vasquez and Arthur Bernhardt and Sajjad
                 Tamimi and Lukas Weber and Florian Stock and Andreas
                 Koch and Ilia Petrov",
  title =        "Near-data processing in database systems on native
                 computational storage under {HTAP} workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "1991--2004",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547307",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547307",
  abstract =     "Today's Hybrid Transactional and Analytical Processing
                 (HTAP) systems, tackle the ever-growing data in
                 combination with a mixture of transactional and
                 analytical workloads. While optimizing for aspects such
                 as data freshness and performance isolation, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Echihabi:2022:HAD,
  author =       "Karima Echihabi and Panagiota Fatourou and Kostas
                 Zoumpatianos and Themis Palpanas and Houda Benbrahim",
  title =        "{Hercules} against data series similarity search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2005--2018",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547308",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547308",
  abstract =     "We propose Hercules, a parallel tree-based technique
                 for exact similarity search on massive disk-based data
                 series collections. We present novel index construction
                 and query answering algorithms that leverage different
                 summarization techniques, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Siddiqui:2022:DLO,
  author =       "Tarique Siddiqui and Wentao Wu and Vivek Narasayya and
                 Surajit Chaudhuri",
  title =        "{DISTILL}: low-overhead data-driven techniques for
                 filtering and costing indexes for scalable index
                 tuning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2019--2031",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547309",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547309",
  abstract =     "Many database systems offer index tuning tools that
                 help automatically select appropriate indexes for
                 improving the performance of an input workload. Index
                 tuning is a resource-intensive and time-consuming task
                 requiring expensive optimizer calls for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2022:OML,
  author =       "Zhihui Yang and Zuozhi Wang and Yicong Huang and Yao
                 Lu and Chen Li and X. Sean Wang",
  title =        "Optimizing machine learning inference queries with
                 correlative proxy models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2032--2044",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547310",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547310",
  abstract =     "We consider accelerating machine learning (ML)
                 inference queries on unstructured datasets. Expensive
                 operators such as feature extractors and classifiers
                 are deployed as user-defined functions (UDFs), which
                 are not penetrable with classic query \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Su:2022:BSD,
  author =       "Li Su and Xiaoming Qin and Zichao Zhang and Rui Yang
                 and Le Xu and Indranil Gupta and Wenyuan Yu and Kai
                 Zeng and Jingren Zhou",
  title =        "{Banyan}: a scoped dataflow engine for graph query
                 service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2045--2057",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547311",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547311",
  abstract =     "Graph query services (GQS) are widely used today to
                 interactively answer graph traversal queries on
                 large-scale graph data. Existing graph query engines
                 focus largely on optimizing the latency of a single
                 query. This ignores significant challenges posed
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2022:FEU,
  author =       "Ziyue Huang and Yuan Qiu and Ke Yi and Graham
                 Cormode",
  title =        "Frequency estimation under multiparty differential
                 privacy: one-shot and streaming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2058--2070",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547312",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547312",
  abstract =     "We study the fundamental problem of frequency
                 estimation under both privacy and communication
                 constraints, where the data is distributed among k
                 parties. We consider two application scenarios: (1)
                 one-shot, where the data is static and the aggregator
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ali:2022:OIS,
  author =       "Ahsan Ali and Riccardo Pinciroli and Feng Yan and
                 Evgenia Smirni",
  title =        "Optimizing inference serving on serverless platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2071--2084",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547313",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547313",
  abstract =     "Serverless computing is gaining popularity for machine
                 learning (ML) serving workload due to its autonomous
                 resource scaling, easy to use and pay-per-use cost
                 model. Existing serverless platforms work well for
                 image-based ML inference, where requests \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Alkowaileet:2022:CFS,
  author =       "Wail Y. Alkowaileet and Michael J. Carey",
  title =        "Columnar formats for schemaless {LSM}-based document
                 stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2085--2097",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547314",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547314",
  abstract =     "In the last decade, document store database systems
                 have gained more traction for storing and querying
                 large volumes of semi-structured data. However, the
                 flexibility of the document stores' data models has
                 limited their ability to store data in a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Qiu:2022:ESP,
  author =       "Yu-Xuan Qiu and Dong Wen and Lu Qin and Wentao Li and
                 Rong-Hua Li and Ying Zhang",
  title =        "Efficient shortest path counting on large road
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2098--2110",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547315",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547315",
  abstract =     "The shortest path distance and related concepts lay
                 the foundations of many real-world applications in road
                 network analysis. The shortest path count has drawn
                 much research attention in academia, not only as a
                 closeness metric accompanying the shorted \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fu:2022:TCE,
  author =       "Fangcheng Fu and Xupeng Miao and Jiawei Jiang and
                 Huanran Xue and Bin Cui",
  title =        "Towards communication-efficient vertical federated
                 learning training via cache-enabled local updates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2111--2120",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547316",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547316",
  abstract =     "Vertical federated learning (VFL) is an emerging
                 paradigm that allows different parties (e.g.,
                 organizations or enterprises) to collaboratively build
                 machine learning models with privacy protection. In the
                 training phase, VFL only exchanges the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2022:DED,
  author =       "Yifan Zhu and Lu Chen and Yunjun Gao and Baihua Zheng
                 and Pengfei Wang",
  title =        "{DESIRE}: an efficient dynamic cluster-based forest
                 indexing for similarity search in multi-metric spaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2121--2133",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547317",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547317",
  abstract =     "Similarity search finds similar objects for a given
                 query object based on a certain similarity metric.
                 Similarity search in metric spaces has attracted
                 increasing attention, as the metric space can
                 accommodate any type of data and support flexible
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kim:2022:AAB,
  author =       "Junghoon Kim and Kaiyu Feng and Gao Cong and Diwen Zhu
                 and Wenyuan Yu and Chunyan Miao",
  title =        "{ABC}: attributed bipartite co-clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2134--2147",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547318",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547318",
  abstract =     "Finding a set of co-clusters in a bipartite network is
                 a fundamental and important problem. In this paper, we
                 present the Attributed Bipartite Co-clustering (ABC)
                 problem which unifies two main concepts: (i) bipartite
                 modularity optimization, and (ii) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xiao:2022:TSD,
  author =       "Jinzhao Xiao and Yuxiang Huang and Changyu Hu and
                 Shaoxu Song and Xiangdong Huang and Jianmin Wang",
  title =        "Time series data encoding for efficient storage: a
                 comparative analysis in {Apache IoTDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2148--2160",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547319",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547319",
  abstract =     "Not only the vast applications but also the distinct
                 features of time series data stimulate the booming
                 growth of time series database management systems, such
                 as Apache IoTDB, InfluxDB, OpenTSDB and so on. Almost
                 all these systems employ columnar \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2022:SLO,
  author =       "Teng Zhang and Jian Tan and Xin Cai and Jianying Wang
                 and Feifei Li and Jianling Sun",
  title =        "{SA-LSM}: optimize data layout for {LSM}-tree based
                 storage using survival analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2161--2174",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547320",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547320",
  abstract =     "A significant fraction of data in cloud storage is
                 rarely accessed, referred to as cold data. Accurately
                 identifying and efficiently managing cold data on
                 cost-effective storages is one of the major challenges
                 for cloud providers, which balances between \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ferragina:2022:IMV,
  author =       "Paolo Ferragina and Giovanni Manzini and Travis Gagie
                 and Dominik K{\"o}ppl and Gonzalo Navarro and Manuel
                 Striani and Francesco Tosoni",
  title =        "Improving matrix-vector multiplication via lossless
                 grammar-compressed matrices",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2175--2187",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547321",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547321",
  abstract =     "As nowadays Machine Learning (ML) techniques are
                 generating huge data collections, the problem of how to
                 efficiently engineer their storage and operations is
                 becoming of paramount importance. In this article we
                 propose a new lossless compression scheme \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2022:NRL,
  author =       "Shangyu Wu and Yufei Cui and Jinghuan Yu and Xuan Sun
                 and Tei-Wei Kuo and Chun Jason Xue",
  title =        "{NFL}: robust learned index via distribution
                 transformation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2188--2200",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547322",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547322",
  abstract =     "Recent works on learned index open a new direction for
                 the indexing field. The key insight of the learned
                 index is to approximate the mapping between keys and
                 positions with piece-wise linear functions. Such
                 methods require partitioning key space for a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zare:2022:LLG,
  author =       "Hamidreza Zare and Viveck Ramesh Cadambe and Bhuvan
                 Urgaonkar and Nader Alfares and Praneet Soni and Chetan
                 Sharma and Arif A. Merchant",
  title =        "{LEGOStore}: a linearizable geo-distributed store
                 combining replication and erasure coding",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2201--2215",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547323",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547323",
  abstract =     "We design and implement LEGOStore, an erasure coding
                 (EC) based linearizable data store over geo-distributed
                 public cloud data centers (DCs). For such a data store,
                 the confluence of the following factors opens up
                 opportunities for EC to be latency-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Simpson:2022:MMU,
  author =       "Michael Simpson and Farnoosh Hashemi and Laks V. S.
                 Lakshmanan",
  title =        "Misinformation mitigation under differential
                 propagation rates and temporal penalties",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2216--2229",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547324",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547324",
  abstract =     "We propose an information propagation model that
                 captures important temporal aspects that have been well
                 observed in the dynamics of fake news diffusion, in
                 contrast with the diffusion of truth. The model
                 accounts for differential propagation rates of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2022:SDL,
  author =       "Lixi Zhou and Jiaqing Chen and Amitabh Das and Hong
                 Min and Lei Yu and Ming Zhao and Jia Zou",
  title =        "Serving deep learning models with deduplication from
                 relational databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2230--2243",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547325",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547325",
  abstract =     "Serving deep learning models from relational databases
                 brings significant benefits. First, features extracted
                 from databases do not need to be transferred to any
                 decoupled deep learning systems for inferences, and
                 thus the system management overhead can \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2022:DOI,
  author =       "Zichun Huang and Shimin Chen",
  title =        "Density-optimized intersection-free mapping and matrix
                 multiplication for join-project operations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2244--2256",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547326",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547326",
  abstract =     "A Join-Project operation is a join operation followed
                 by a duplicate eliminating projection operation. It is
                 used in a large variety of applications, including
                 entity matching, set analytics, and graph analytics.
                 Previous work proposes a hybrid design \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jahangiri:2022:DTO,
  author =       "Shiva Jahangiri and Michael J. Carey and
                 Johann-Christoph Freytag",
  title =        "Design trade-offs for a robust dynamic hybrid hash
                 join",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2257--2269",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547327",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547327",
  abstract =     "Hybrid Hash Join (HHJ) has proven to be one of the
                 most efficient and widely-used join algorithms. While
                 HHJ's performance depends largely on accurate
                 statistics and information about the input relations,
                 it may not always be practical or possible for a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Foufoulas:2022:YYE,
  author =       "Yannis Foufoulas and Alkis Simitsis and Lefteris
                 Stamatogiannakis and Yannis Ioannidis",
  title =        "{YeSQL}: ``you extend {SQL}'' with rich and highly
                 performant user-defined functions in relational
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2270--2283",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547328",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547328",
  abstract =     "The diversity and complexity of modern data management
                 applications have led to the extension of the
                 relational paradigm with syntactic and semantic support
                 for User-Defined Functions (UDFs). Although
                 well-established in traditional DBMS settings, UDFs
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ahmetaj:2022:MSS,
  author =       "Shqiponja Ahmetaj and Bianca L{\"o}hnert and Magdalena
                 Ortiz and Mantas Simkus",
  title =        "Magic shapes for {SHACL} validation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "10",
  pages =        "2284--2296",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3547305.3547329",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Sep 8 11:58:53 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3547305.3547329",
  abstract =     "A key prerequisite for the successful adoption of the
                 Shapes Constraint Language (SHACL)---the W3C
                 standardized constraint language for RDF graphs---is
                 the availability of automated tools that efficiently
                 validate targeted constraints (known as shapes
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Merchant:2022:SGR,
  author =       "Arpit Merchant and Aristides Gionis and Michael
                 Mathioudakis",
  title =        "Succinct graph representations as distance oracles: an
                 experimental evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2297--2306",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551794",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551794",
  abstract =     "Distance oracles answer shortest-path queries between
                 any pair of nodes in a graph. They are often built
                 using succinct graph representations such as spanners,
                 sketches, and compressors to minimize oracle size and
                 query answering latency. Node \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jiang:2022:ECS,
  author =       "Yangqin Jiang and Yixiang Fang and Chenhao Ma and Xin
                 Cao and Chunshan Li",
  title =        "Effective community search over large star-schema
                 heterogeneous information networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2307--2320",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551795",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551795",
  abstract =     "Community search (CS) enables personalized community
                 discovery and has found a wide spectrum of emerging
                 applications such as setting up social events and
                 friend recommendation. While CS has been extensively
                 studied for conventional homogeneous networks,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ting:2022:NDT,
  author =       "Kai Ming Ting and Zongyou Liu and Hang Zhang and Ye
                 Zhu",
  title =        "A new distributional treatment for time series and an
                 anomaly detection investigation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2321--2333",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551796",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551796",
  abstract =     "Time series is traditionally treated with two main
                 approaches, i.e., the time domain approach and the
                 frequency domain approach. These approaches must rely
                 on a sliding window so that time-shift versions of a
                 periodic subsequence can be measured to be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Denham:2022:WUL,
  author =       "Benjamin Denham and Edmund M-K. Lai and Roopak Sinha
                 and M. Asif Naeem",
  title =        "{Witan}: unsupervised labelling function generation
                 for assisted data programming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2334--2347",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551797",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551797",
  abstract =     "Effective supervised training of modern machine
                 learning models often requires large labelled training
                 datasets, which could be prohibitively costly to
                 acquire for many practical applications. Research
                 addressing this problem has sought ways to leverage
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bao:2022:SMM,
  author =       "Ergute Bao and Yizheng Zhu and Xiaokui Xiao and Yin
                 Yang and Beng Chin Ooi and Benjamin Hong Meng Tan and
                 Khin Mi Mi Aung",
  title =        "{Skellam} mixture mechanism: a novel approach to
                 federated learning with differential privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2348--2360",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551798",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551798",
  abstract =     "Deep neural networks have strong capabilities of
                 memorizing the underlying training data, which can be a
                 serious privacy concern. An effective solution to this
                 problem is to train models with differential privacy (
                 DP ), which provides rigorous privacy \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hilprecht:2022:ZSC,
  author =       "Benjamin Hilprecht and Carsten Binnig",
  title =        "Zero-shot cost models for out-of-the-box learned cost
                 prediction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2361--2374",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551799",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551799",
  abstract =     "In this paper, we introduce zero-shot cost models,
                 which enable learned cost estimation that generalizes
                 to unseen databases. In contrast to state-of-the-art
                 workload-driven approaches, which require to execute a
                 large set of training queries on every \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Choi:2022:WMG,
  author =       "Dalsu Choi and Hyunsik Yoon and Hyubjin Lee and Yon
                 Dohn Chung",
  title =        "{Waffle}: in-memory grid index for moving objects with
                 reinforcement learning-based configuration tuning
                 system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2375--2388",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551800",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551800",
  abstract =     "Location-based services for moving objects are close
                 to our lives. For example, ride-sharing services,
                 micro-mobility services, navigation and traffic
                 management, delivery services, and autonomous driving
                 are all based on moving objects. The efficient
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jungmair:2022:DOF,
  author =       "Michael Jungmair and Andr{\'e} Kohn and Jana Giceva",
  title =        "Designing an open framework for query optimization and
                 compilation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2389--2401",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551801",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551801",
  abstract =     "Since its invention, data-centric code generation has
                 been adopted for query compilation by various database
                 systems in academia and industry. These database
                 systems are fast but maximize performance at the
                 expense of developer friendliness, flexibility,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Nguyen:2022:PST,
  author =       "Lam-Duy Nguyen and Sang-Won Lee and Beomseok Nam",
  title =        "In-page shadowing and two-version timestamp ordering
                 for mobile {DBMSs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2402--2414",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551802",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551802",
  abstract =     "Increasing the concurrency level in mobile database
                 systems has not received much attention, mainly because
                 the concurrency requirements of mobile workloads has
                 been regarded to be low. Contrary to popular belief,
                 mobile workloads require higher \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2022:REA,
  author =       "Shixuan Sun and Xibo Sun and Bingsheng He and Qiong
                 Luo",
  title =        "{RapidFlow}: an efficient approach to continuous
                 subgraph matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2415--2427",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551803",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551803",
  abstract =     "Continuous subgraph matching (CSM) is an important
                 building block in many real-time graph processing
                 applications. Given a subgraph query Q and a data graph
                 stream, a CSM algorithm reports the occurrences of Q in
                 the stream. Specifically, when a new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Helali:2022:SAA,
  author =       "Mossad Helali and Essam Mansour and Ibrahim Abdelaziz
                 and Julian Dolby and Kavitha Srinivas",
  title =        "A scalable {AutoML} approach based on graph neural
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2428--2436",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551804",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551804",
  abstract =     "AutoML systems build machine learning models
                 automatically by performing a search over valid data
                 transformations and learners, along with
                 hyper-parameter optimization for each learner. Many
                 AutoML systems use meta-learning to guide search for
                 optimal \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pappachan:2022:DTT,
  author =       "Primal Pappachan and Shufan Zhang and Xi He and Sharad
                 Mehrotra",
  title =        "Don't be a tattle-tale: preventing leakages through
                 data dependencies on access control protected data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2437--2449",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551805",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551805",
  abstract =     "We study the problem of answering queries when (part
                 of) the data may be sensitive and should not be leaked
                 to the querier. Simply restricting the computation to
                 non-sensitive part of the data may leak sensitive data
                 through inference based on data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xu:2022:ELB,
  author =       "Qingyu Xu and Feng Zhang and Zhiming Yao and Lv Lu and
                 Xiaoyong Du and Dong Deng and Bingsheng He",
  title =        "Efficient load-balanced butterfly counting on {GPU}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2450--2462",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551806",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551806",
  abstract =     "Butterfly counting is an important and costly
                 operation for large bipartite graphs. GPUs are popular
                 parallel heterogeneous devices and can bring
                 significant performance improvement for data science
                 applications. Unfortunately, no work enables efficient
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Benson:2022:PBB,
  author =       "Lawrence Benson and Leon Papke and Tilmann Rabl",
  title =        "{PerMA}-bench: benchmarking persistent memory access",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2463--2476",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551807",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551807",
  abstract =     "Persistent memory's (PMem) byte-addressability and
                 persistence at DRAM-like speed with SSD-like capacity
                 have the potential to cause a major performance shift
                 in database storage systems. With the availability of
                 Intel Optane DC Persistent Memory, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{He:2022:EPM,
  author =       "Yuliang He and Duo Lu and Kaisong Huang and Tianzheng
                 Wang",
  title =        "Evaluating persistent memory range indexes: part two",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2477--2490",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551808",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551808",
  abstract =     "Scalable persistent memory (PM) has opened up new
                 opportunities for building indexes that operate and
                 persist data directly on the memory bus, potentially
                 enabling instant recovery, low latency and high
                 throughput. When real PM hardware (Intel Optane
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yogatama:2022:ODP,
  author =       "Bobbi W. Yogatama and Weiwei Gong and Xiangyao Yu",
  title =        "Orchestrating data placement and query execution in
                 heterogeneous {CPU-GPU DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2491--2503",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551809",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551809",
  abstract =     "There has been a growing interest in using GPU to
                 accelerate data analytics due to its massive
                 parallelism and high memory bandwidth. The main
                 constraint of using GPU for data analytics is the
                 limited capacity of GPU memory. Heterogeneous CPU-GPU
                 query \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:IMO,
  author =       "Weicheng Wang and Raymond Chi-Wing Wong",
  title =        "Interactive mining with ordered and unordered
                 attributes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2504--2516",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551810",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551810",
  abstract =     "There are various queries proposed to assist users in
                 finding their favorite tuples from a dataset with the
                 help of user interaction. Specifically, they interact
                 with a user by asking questions. Each question presents
                 two tuples, which are selected from \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2022:FDS,
  author =       "Wenzhe Yang and Sheng Wang and Yuan Sun and Zhiyong
                 Peng",
  title =        "Fast dataset search with earth mover's distance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2517--2529",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551811",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551811",
  abstract =     "The amount of spatial data in open data portals has
                 increased rapidly, raising the demand for spatial
                 dataset search in large data repositories. In this
                 paper, we tackle spatial dataset search by using the
                 Earth Mover's Distance (EMD) to measure the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pereira:2022:AST,
  author =       "Jo{\~a}o L. M. Pereira and Jo{\~a}o Casanova and
                 Helena Galhardas and Dennis Shasha",
  title =        "{AcX}: system, techniques, and experiments for acronym
                 expansion",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2530--2544",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551812",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551812",
  abstract =     "In this information-accumulating world, each of us
                 must learn continuously. To participate in a new field,
                 or even a sub-field, one must be aware of the
                 terminology including the acronyms that specialists
                 know so well, but newcomers do not. Building on
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2022:GTH,
  author =       "Hongzhi Chen and Changji Li and Chenguang Zheng and
                 Chenghuan Huang and Juncheng Fang and James Cheng and
                 Jian Zhang",
  title =        "{G-tran}: a high performance distributed graph
                 database with a decentralized architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2545--2558",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551813",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551813",
  abstract =     "Graph transaction processing poses unique challenges
                 such as random data access due to the irregularity of
                 graph structures, low throughput and high abort rate
                 due to the relatively large read/write sets in graph
                 transactions. To address these \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Konig:2022:TPS,
  author =       "Arnd Christian K{\"o}nig and Yi Shan and Tobias
                 Ziegler and Aarati Kakaraparthy and Willis Lang and
                 Justin Moeller and Ajay Kalhan and Vivek Narasayya",
  title =        "Tenant placement in over-subscribed
                 database-as-a-service clusters",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2559--2571",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551814",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551814",
  abstract =     "Relational cloud Database-as-a-Service offerings run
                 on multi-tenant infrastructure consisting of clusters
                 of nodes, with each node hosting multiple tenant
                 databases. Such clusters may be over-subscribed to
                 increase resource utilization and improve \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2022:EBS,
  author =       "Yue Chen and Kaiyu Feng and Gao Cong and Han Mao
                 Kiah",
  title =        "Example-based spatial pattern matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2572--2584",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551815",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551815",
  abstract =     "The prevalence of GPS-enabled mobile devices and
                 location-based services yield massive volume of spatial
                 objects where each object contains information
                 including geographical location, name, address,
                 category and other attributes. This paper introduces
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Peng:2022:NFP,
  author =       "Zeshun Peng and Yanfeng Zhang and Qian Xu and Haixu
                 Liu and Yuxiao Gao and Xiaohua Li and Ge Yu",
  title =        "{NeuChain}: a fast permissioned blockchain system with
                 deterministic ordering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2585--2598",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551816",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551816",
  abstract =     "Blockchain serves as a replicated transactional
                 processing system in a trustless distributed
                 environment. Existing blockchain systems all rely on an
                 explicit ordering step to determine the global order of
                 transactions that are collected from multiple
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{McKenna:2022:AAI,
  author =       "Ryan McKenna and Brett Mullins and Daniel Sheldon and
                 Gerome Miklau",
  title =        "{AIM}: an adaptive and iterative mechanism for
                 differentially private synthetic data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2599--2612",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551817",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551817",
  abstract =     "We propose AIM, a new algorithm for differentially
                 private synthetic data generation. AIM is a
                 workload-adaptive algorithm within the paradigm of
                 algorithms that first selects a set of queries, then
                 privately measures those queries, and finally
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Toussaint:2022:TNV,
  author =       "Etienne Toussaint and Paolo Guagliardo and Leonid
                 Libkin and Juan Sequeda",
  title =        "Troubles with nulls, views from the users",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2613--2625",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551818",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551818",
  abstract =     "Incomplete data, in the form of null values, has been
                 extensively studied since the inception of the
                 relational model in the 1970s. Anecdotally, one hears
                 that the way in which SQL, the standard language for
                 relational databases, handles nulls creates a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Park:2022:GSE,
  author =       "Yeonhong Park and Sunhong Min and Jae W. Lee",
  title =        "{Ginex}: {SSD}-enabled billion-scale graph neural
                 network training on a single machine via provably
                 optimal in-memory caching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2626--2639",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551819",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551819",
  abstract =     "Graph Neural Networks (GNNs) are receiving a spotlight
                 as a powerful tool that can effectively serve various
                 inference tasks on graph structured data. As the size
                 of real-world graphs continues to scale, the GNN
                 training system faces a scalability \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2022:SPQ,
  author =       "Junhua Zhang and Wentao Li and Long Yuan and Lu Qin
                 and Ying Zhang and Lijun Chang",
  title =        "Shortest-path queries on complex networks:
                 experiments, analyses, and improvement",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2640--2652",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551820",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551820",
  abstract =     "The shortest-path query, which returns the shortest
                 path between two vertices, is a basic operation on
                 complex networks and has numerous applications. To
                 handle shortest-path queries, one option is to use
                 traversal-based methods (e.g., breadth-first \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ghayyur:2022:MAA,
  author =       "Sameera Ghayyur and Dhrubajyoti Ghosh and Xi He and
                 Sharad Mehrotra",
  title =        "{MIDE}: accuracy aware minimally invasive data
                 exploration for decision support",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2653--2665",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551821",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551821",
  abstract =     "This paper studies privacy in the context of
                 decision-support queries that classify objects as
                 either true or false based on whether they satisfy the
                 query. Mechanisms to ensure privacy may result in false
                 positives and false negatives. In decision-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ghosh:2022:JJT,
  author =       "Dhrubajyoti Ghosh and Peeyush Gupta and Sharad
                 Mehrotra and Roberto Yus and Yasser Altowim",
  title =        "{JENNER}: just-in-time enrichment in query
                 processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2666--2678",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551822",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551822",
  abstract =     "Emerging domains, such as sensor-driven smart spaces
                 and social media analytics, require incoming data to be
                 enriched prior to its use. Enrichment often consists of
                 machine learning (ML) functions that are too
                 expensive/infeasible to execute at \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2022:CCA,
  author =       "Jiaoyi Zhang and Yihan Gao",
  title =        "{CARMI}: a cache-aware learned index with a cost-based
                 construction algorithm",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "11",
  pages =        "2679--2691",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3551793.3551823",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Oct 29 08:52:37 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3551793.3551823",
  abstract =     "Learned indexes, which use machine learning models to
                 replace traditional index structures, have shown
                 promising results in recent studies. However, existing
                 learned indexes exhibit a performance gap between
                 synthetic and real-world datasets, making \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chiosa:2022:HAC,
  author =       "Monica Chiosa and Fabio Maschi and Ingo M{\"u}ller and
                 Gustavo Alonso and Norman May",
  title =        "Hardware acceleration of compression and encryption in
                 {SAP HANA}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3277--3291",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554822",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554822",
  abstract =     "With the advent of cloud computing, where
                 computational resources are expensive and data movement
                 needs to be secured and minimized, database management
                 systems need to reconsider their architecture to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Graf:2022:FPB,
  author =       "Martin Graf and Lukas Laskowski and Florian Papsdorf
                 and Florian Sold and Roland Gremmelspacher and Felix
                 Naumann and Fabian Panse",
  title =        "{Frost}: a platform for benchmarking and exploring
                 data matching results",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3292--3305",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554823",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554823",
  abstract =     "``Bad'' data has a direct impact on 88\% of companies,
                 with the average company losing 12\% of its revenue due
                 to it. Duplicates --- multiple but different
                 representations of the same real-world entities --- are
                 among the main \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2022:BHP,
  author =       "Changji Li and Hongzhi Chen and Shuai Zhang and
                 Yingqian Hu and Chao Chen and Zhenjie Zhang and Meng Li
                 and Xiangchen Li and Dongqing Han and Xiaohui Chen and
                 Xudong Wang and Huiming Zhu and Xuwei Fu and Tingwei Wu
                 and Hongfei Tan and Hengtian Ding and Mengjin Liu and
                 Kangcheng Wang and Ting Ye and Lei Li and Xin Li and Yu
                 Wang and Chenguang Zheng and Hao Yang and James Cheng",
  title =        "{ByteGraph}: a high-performance distributed graph
                 database in {ByteDance}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3306--3318",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554824",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554824",
  abstract =     "Most products at ByteDance, e.g., TikTok, Douyin, and
                 Toutiao, naturally generate massive amounts of graph
                 data. To efficiently store, query and update massive
                 graph data is challenging for the broad range of
                 products at \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Das:2022:CEC,
  author =       "Prakash Das and Shivangi Srivastava and Valentin
                 Moskovich and Anmol Chaturvedi and Anant Mittal and
                 Yongqin Xiao and Mosharaf Chowdhury",
  title =        "{CDI-E}: an elastic cloud service for data
                 engineering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3319--3331",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554825",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554825",
  abstract =     "We live in the gilded age of data-driven computing.
                 With public clouds offering virtually unlimited amounts
                 of compute and storage, enterprises collecting data
                 about every aspect of their businesses, and advances in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:OED,
  author =       "Sheng Wang and Yiran Li and Huorong Li and Feifei Li
                 and Chengjin Tian and Le Su and Yanshan Zhang and
                 Yubing Ma and Lie Yan and Yuanyuan Sun and Xuntao Cheng
                 and Xiaolong Xie and Yu Zou",
  title =        "{Operon}: an encrypted database for
                 ownership-preserving data management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3332--3345",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554826",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554826",
  abstract =     "The past decade has witnessed the rapid development of
                 cloud computing and data-centric applications. While
                 these innovations offer numerous attractive features
                 for data processing, they also bring in new issues
                 about \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gong:2022:TPF,
  author =       "Caixin Gong and Chengjin Tian and Zhengheng Wang and
                 Sheng Wang and Xiyu Wang and Qiulei Fu and Wu Qin and
                 Long Qian and Rui Chen and Jiang Qi and Ruo Wang and
                 Guoyun Zhu and Chenghu Yang and Wei Zhang and Feifei
                 Li",
  title =        "{Tair-PMem}: a fully durable non-volatile memory
                 database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3346--3358",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554827",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554827",
  abstract =     "In-memory databases (IMDBs) have been the backbone of
                 modern systems that demand high throughput and low
                 latency. Because of the cost and volatility of DRAM,
                 IMDBs become incompetent when dealing with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lambov:2022:TMC,
  author =       "Branimir Lambov",
  title =        "Trie memtables in {Cassandra}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3359--3371",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554828",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554828",
  abstract =     "This paper discusses a new memtable implementation for
                 Apache Cassandra which is based on tries (also called
                 prefix trees) and byte-comparable representations of
                 database keys. The implementation is already in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pedreira:2022:VMU,
  author =       "Pedro Pedreira and Orri Erling and Masha Basmanova and
                 Kevin Wilfong and Laith Sakka and Krishna Pai and Wei
                 He and Biswapesh Chattopadhyay",
  title =        "{Velox}: meta's unified execution engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3372--3384",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554829",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554829",
  abstract =     "The ad-hoc development of new specialized computation
                 engines targeted to very specific data workloads has
                 created a siloed data landscape. Commonly, these
                 engines share little to nothing with each other and are
                 hard to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2022:OMT,
  author =       "Zhenkun Yang and Chuanhui Yang and Fusheng Han and
                 Mingqiang Zhuang and Bing Yang and Zhifeng Yang and
                 Xiaojun Cheng and Yuzhong Zhao and Wenhui Shi and
                 Huafeng Xi and Huang Yu and Bin Liu and Yi Pan and
                 Boxue Yin and Junquan Chen and Quanqing Xu",
  title =        "{OceanBase}: a 707 million {tpmC} distributed
                 relational database system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3385--3397",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554830",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554830",
  abstract =     "We have designed and developed OceanBase, a
                 distributed relational database system from the very
                 basics for a decade. Being a scale-out multi-tenant
                 system, OceanBase is cross-region fault tolerant, which
                 is based on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lan:2022:VVR,
  author =       "Hai Lan and Jiong Xie and Zhifeng Bao and Feifei Li
                 and Wei Tian and Fang Wang and Sheng Wang and Ailin
                 Zhang",
  title =        "{VRE}: a versatile, robust, and economical trajectory
                 data system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3398--3410",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554831",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554831",
  abstract =     "Managing massive trajectory data from various moving
                 objects has always been a demanding task. A desired
                 trajectory data system should be versatile in its
                 supported query types and distance functions, of low
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2022:BBH,
  author =       "Jianjun Chen and Yonghua Ding and Ye Liu and Fangshi
                 Li and Li Zhang and Mingyi Zhang and Kui Wei and Lixun
                 Cao and Dan Zou and Yang Liu and Lei Zhang and Rui Shi
                 and Wei Ding and Kai Wu and Shangyu Luo and Jason Sun
                 and Yuming Liang",
  title =        "{ByteHTAP}: {Bytedance}'s {HTAP} system with high data
                 freshness and strong data consistency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3411--3424",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554832",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554832",
  abstract =     "In recent years, at ByteDance, we see more and more
                 business scenarios that require performing complex
                 analysis over freshly imported data, together with
                 transaction support and strong data consistency. In
                 this \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wingerath:2022:BCW,
  author =       "Wolfram Wingerath and Benjamin Wollmer and Markus
                 Bestehorn and Stephan Succo and Sophie Ferrlein and
                 Florian B{\"u}cklers and J{\"o}rn Domnik and Fabian
                 Panse and Erik Witt and Anil Sener and Felix Gessert
                 and Norbert Ritter",
  title =        "{Beaconnect}: continuous web performance {A\slash B}
                 testing at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3425--3431",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554833",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554833",
  abstract =     "Content delivery networks (CDNs) are critical for
                 minimizing access latency in the Web as they
                 efficiently distribute online resources across the
                 globe. But since CDNs can only be enabled on the scope
                 of entire \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2022:COC,
  author =       "Zongzhi Chen and Xinjun Yang and Feifei Li and Xuntao
                 Cheng and Qingda Hu and Zheyu Miao and Rongbiao Xie and
                 Xiaofei Wu and Kang Wang and Zhao Song and Haiqing Sun
                 and Zechao Zhuang and Yuming Yang and Jie Xu and Liang
                 Yin and Wenchao Zhou and Sheng Wang",
  title =        "{CloudJump}: optimizing cloud databases for cloud
                 storages",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3432--3444",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554834",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554834",
  abstract =     "There has been an increasing interest in building
                 cloud-native databases that decouple computation and
                 storage for elasticity. A cloud-native database often
                 adopts a cloud storage underneath its storage engine,
                 leveraging \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zheng:2022:DMN,
  author =       "Kaiping Zheng and Shaofeng Cai and Horng Ruey Chua and
                 Melanie Herschel and Meihui Zhang and Beng Chin Ooi",
  title =        "{DyHealth}: making neural networks dynamic for
                 effective healthcare analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3445--3458",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554835",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554835",
  abstract =     "In National University Hospital (NUH) in Singapore, we
                 conduct healthcare analytics that analyzes
                 heterogeneous electronic medical records (EMR) to
                 support effective clinical decision-making on a daily
                 basis. Existing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mishchenko:2022:BCS,
  author =       "Andrey Mishchenko and Dominique Danco and Abhilash
                 Jindal and Adrian Blue",
  title =        "{Blueprint}: a constraint-solving approach for
                 document extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3459--3471",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554836",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554836",
  abstract =     "Blueprint is a declarative domain-specific language
                 for document extraction. Users describe document layout
                 using spatial, textual, semantic, and numerical fuzzy
                 constraints, and the language runtime extracts the
                 field-value \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yu:2022:TCL,
  author =       "Muzhi Yu and Zhaoxiang Lin and Jinan Sun and Runyun
                 Zhou and Guoqiang Jiang and Hua Huang and Shikun
                 Zhang",
  title =        "{TencentCLS}: the cloud log service with high query
                 performances",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3472--3482",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554837",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554837",
  abstract =     "With the trend of cloud computing, the cloud log
                 service is becoming increasingly important, as it plays
                 a critical role in tasks such as root cause analysis,
                 service monitoring and security audition. To meet these
                 needs, we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xie:2022:GMD,
  author =       "Jiong Xie and Zhen Chen and Jianwei Liu and Fang Wang
                 and Feifei Li and Zhida Chen and Yinpei Liu and Songlu
                 Cai and Zhenhua Fan and Fei Xiao and Yue Chen",
  title =        "{Ganos}: a multidimensional, dynamic, and
                 scene-oriented cloud-native spatial database engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3483--3495",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554838",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554838",
  abstract =     "Recently, the trend of developing digital twins for
                 smart cities has driven a need for managing large-scale
                 multidimensional, dynamic, and scene-oriented spatial
                 data. Due to larger data scale and more complex
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lakshman:2022:MHD,
  author =       "Sarath Lakshman and Apaar Gupta and Rohan Suri and
                 Scott Lashley and John Liang and Srinath Duvuru and
                 Ravi Mayuram",
  title =        "{Magma}: a high data density storage engine used in
                 couchbase",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3496--3508",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554839",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554839",
  abstract =     "We present Magma, a write-optimized high data density
                 key-value storage engine used in the Couchbase NoSQL
                 distributed document database. Today's write-heavy
                 data-intensive applications like ad-serving, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cahoon:2022:DAS,
  author =       "Joyce Cahoon and Wenjing Wang and Yiwen Zhu and
                 Katherine Lin and Sean Liu and Raymond Truong and Neetu
                 Singh and Chengcheng Wan and Alexandra Ciortea and
                 Sreraman Narasimhan and Subru Krishnan",
  title =        "{Doppler}: automated {SKU} recommendation in migrating
                 {SQL} workloads to the cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3509--3521",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554840",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554840",
  abstract =     "Selecting the optimal cloud target to migrate SQL
                 estates from on-premises to the cloud remains a
                 challenge. Current solutions are not only
                 time-consuming and error-prone, requiring significant
                 user input, but also fail to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Harizopoulos:2022:MNG,
  author =       "Stavros Harizopoulos and Taylor Hopper and Morton Mo
                 and Shyam Sundar Chandrasekaran and Tongguang Chen and
                 Yan Cui and Nandini Ganesh and Gary Helmling and Hieu
                 Pham and Sebastian Wong",
  title =        "{Meta}'s next-generation realtime monitoring and
                 analytics platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3522--3534",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554841",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554841",
  abstract =     "Unlike traditional database systems where data and
                 system availability are tied together, there is a wide
                 class of systems targeting realtime monitoring and
                 analytics over structured logs where these properties
                 can be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gaffney:2022:SPP,
  author =       "Kevin P. Gaffney and Martin Prammer and Larry
                 Brasfield and D. Richard Hipp and Dan Kennedy and
                 Jignesh M. Patel",
  title =        "{SQLite}: past, present, and future",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3535--3547",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554842",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554842",
  abstract =     "In the two decades following its initial release,
                 SQLite has become the most widely deployed database
                 engine in existence. Today, SQLite is found in nearly
                 every smartphone, computer, web browser, television,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Guo:2022:MCN,
  author =       "Rentong Guo and Xiaofan Luan and Long Xiang and Xiao
                 Yan and Xiaomeng Yi and Jigao Luo and Qianya Cheng and
                 Weizhi Xu and Jiarui Luo and Frank Liu and Zhenshan Cao
                 and Yanliang Qiao and Ting Wang and Bo Tang and Charles
                 Xie",
  title =        "{Manu}: a cloud native vector database management
                 system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3548--3561",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554843",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554843",
  abstract =     "With the development of learning-based embedding
                 models, embedding vectors are widely used for analyzing
                 and searching unstructured data. As vector collections
                 exceed billion-scale, fully managed and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Galhotra:2022:ARD,
  author =       "Sainyam Galhotra and Udayan Khurana",
  title =        "Automated relational data explanation using external
                 semantic knowledge",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3562--3565",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554844",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554844",
  abstract =     "In data science problems, understanding the data is a
                 crucial first step. However, it can be challenging and
                 time intensive for a data scientist who is not an
                 expert in that domain. Several downstream tasks such as
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rossi:2022:KEF,
  author =       "Andrea Rossi and Donatella Firmani and Paolo Merialdo
                 and Tommaso Teofili",
  title =        "{Kelpie}: an explainability framework for
                 embedding-based link prediction models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3566--3569",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554845",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554845",
  abstract =     "The latest generations of Link Prediction (LP) models
                 rely on embeddings to tackle incompleteness in
                 Knowledge Graphs, achieving great performance at the
                 cost of interpretability. Their opaqueness limits the
                 trust \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2022:ODC,
  author =       "Yin Lin and Brit Youngmann and Yuval Moskovitch and H.
                 V. Jagadish and Tova Milo",
  title =        "{OREO}: detection of cherry-picked generalizations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3570--3573",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554846",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554846",
  abstract =     "Data analytics often make sense of large data sets by
                 generalization: aggregating from the detailed data to a
                 more general context. Given a dataset, misleading
                 generalizations can sometimes be drawn from a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kohn:2022:DWF,
  author =       "Andr{\'e} Kohn and Dominik Moritz and Mark Raasveldt
                 and Hannes M{\"u}hleisen and Thomas Neumann",
  title =        "{DuckDB-wasm}: fast analytical processing for the
                 web",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3574--3577",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554847",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554847",
  abstract =     "We introduce DuckDB-Wasm, a WebAssembly version of the
                 database system DuckDB, to provide fast analytical
                 processing for the Web. DuckDB-Wasm evaluates SQL
                 queries asynchronously in web workers, supports
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xi:2022:EHL,
  author =       "Yihai Xi and Ning Wang and Xinyu Chen and Yiyi Zhang
                 and Zilong Wang and Zhihong Xu and Yue Wang",
  title =        "{EasyDR}: a human-in-the-loop error detection \&
                 repair platform for holistic table cleaning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3578--3581",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554848",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554848",
  abstract =     "Many tables on the web suffer from multi-level and
                 multi-type quality problems, but existing cleaning
                 systems cannot provide a comprehensive quality
                 improvement for them. Most of these systems are
                 designed for solving a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pan:2022:HFD,
  author =       "Xuchen Pan and Yongxin Tong and Chunbo Xue and Zimu
                 Zhou and Junping Du and Yuxiang Zeng and Yexuan Shi and
                 Xiaofei Zhang and Lei Chen and Yi Xu and Ke Xu and
                 Weifeng Lv",
  title =        "{Hu-fu}: a data federation system for secure spatial
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3582--3585",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554849",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554849",
  abstract =     "The increasing concerns on data security limit the
                 sharing of data distributedly stored at multiple data
                 owners and impede the scale of spatial queries over big
                 urban data. In response, data federation systems have
                 emerged \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gassen:2022:DCS,
  author =       "Marius Gassen and Benjamin H{\"a}ttasch and Benjamin
                 Hilprecht and Nadja Geisler and Alexander Fraser and
                 Carsten Binnig",
  title =        "Demonstrating {CAT}: synthesizing data-aware
                 conversational agents for transactional databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3586--3589",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554850",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554850",
  abstract =     "Databases for OLTP are often the backbone for
                 applications such as hotel room or cinema ticket
                 booking applications. However, developing a
                 conversational agent (i.e., a chatbot-like interface)
                 to allow end-users to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Personnaz:2022:EGE,
  author =       "Aur{\'e}lien Personnaz and Brit Youngmann and Sihem
                 Amer-Yahia",
  title =        "{EDA4SUM}: guided exploration of data summaries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3590--3593",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554851",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554851",
  abstract =     "We demonstrate EDA4Sum, a framework dedicated to
                 generating guided multi-step data summarization
                 pipelines for very large datasets. Data summarization
                 is the process of producing interpretable and
                 representative \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2022:CEQ,
  author =       "Chenjie Li and Juseung Lee and Zhengjie Miao and Boris
                 Glavic and Sudeepa Roy",
  title =        "{CaJaDE}: explaining query results by augmenting
                 provenance with context",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3594--3597",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554852",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554852",
  abstract =     "In this work, we demonstrate CaJaDE (Context-Aware
                 Join-Augmented Deep Explanations), a system that
                 explains query results by augmenting provenance with
                 contextual information from other related tables in the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Asada:2022:STT,
  author =       "Yuki Asada and Victor Fu and Apurva Gandhi and Advitya
                 Gemawat and Lihao Zhang and Dong He and Vivek Gupta and
                 Ehi Nosakhare and Dalitso Banda and Rathijit Sen and
                 Matteo Interlandi",
  title =        "Share the tensor tea: how databases can leverage the
                 machine learning ecosystem",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3598--3601",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554853",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554853",
  abstract =     "We demonstrate Tensor Query Processor (TQP): a query
                 processor that automatically compiles relational
                 operators into tensor programs. By leveraging tensor
                 runtimes such as PyTorch, TQP is able to: (1) integrate
                 with ML \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tan:2022:MTV,
  author =       "Jess Tan and Desmond Yeo and Rachael Neoh and Huey-Eng
                 Chua and Sourav S Bhowmick",
  title =        "{MOCHA}: a tool for visualizing impact of operator
                 choices in query execution plans for database
                 education",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3602--3605",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554854",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554854",
  abstract =     "The database systems course is offered in many major
                 universities. A key learning goal of learners taking
                 such a course is to understand how sql queries are
                 processed in an RDBMS in practice. To this end,
                 comprehension of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chan:2022:LVK,
  author =       "Tsz Nam Chan and Pak Lon Ip and Kaiyan Zhao and Leong
                 Hou U and Byron Choi and Jianliang Xu",
  title =        "{LIBKDV}: a versatile kernel density visualization
                 library for geospatial analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3606--3609",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554855",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554855",
  abstract =     "Kernel density visualization (KDV) has been widely
                 used in many geospatial analysis tasks, including
                 traffic accident hotspot detection, crime hotspot
                 detection, and disease outbreak detection. Although KDV
                 can be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ajmani:2022:DMR,
  author =       "Arul Ajmani and Aayush Shah and Alexander Shraer and
                 Adam Storm and Rebecca Taft and Oliver Tan and Nathan
                 VanBenschoten",
  title =        "A demonstration of multi-region {CockroachDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3610--3613",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554856",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554856",
  abstract =     "A database service is required to meet the
                 consistency, performance, and availability goals of
                 modern applications serving a global user-base.
                 Configuring a database deployed across multiple regions
                 such that it fulfills these \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chapman:2022:DAD,
  author =       "Adriane Chapman and Luca Lauro and Paolo Missier and
                 Riccardo Torlone",
  title =        "{DPDS}: assisting data science with data provenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3614--3617",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554857",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554857",
  abstract =     "Successful data-driven science requires a complex
                 combination of data engineering pipelines and data
                 modelling techniques. Robust and defensible results can
                 only be achieved when each step in the pipeline
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dadvar:2022:PPO,
  author =       "Vargha Dadvar and Lukasz Golab and Divesh Srivastava",
  title =        "{POEM}: pattern-oriented explanations of {CNN}
                 models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3618--3621",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554858",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554858",
  abstract =     "Deep learning models achieve state-of-the-art
                 performance in many applications, but their prediction
                 decisions are difficult to explain. Various solutions
                 exist in the area of explainable AI, for example to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zalipynis:2022:WGA,
  author =       "Ramon Antonio Rodriges Zalipynis and Nikita Terlych",
  title =        "{WebArrayDB}: a geospatial array {DBMS} in your web
                 browser",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3622--3625",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554859",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554859",
  abstract =     "Geospatial array DBMSs operate on georeferenced N -d
                 arrays. They provide storage engines, query parsers,
                 and processing capabilities as their core
                 functionality. Traditionally, those have been too heavy
                 for a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lan:2022:ATA,
  author =       "Hai Lan and Yuanjia Zhang and Zhifeng Bao and Yu Dong
                 and Dongxu Huang and Liu Tang and Jian Zhang",
  title =        "{AutoDI}: towards an automatic plan regression
                 analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3626--3629",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554860",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554860",
  abstract =     "Manual analysis on plan regression is both
                 labor-intensive and inefficient for a large query plan
                 and numerous queries. In this paper, we demonstrate
                 AutoDI, an automatic detection and inference tool that
                 has been \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Davidson:2022:PEA,
  author =       "Susan B. Davidson and Shay Gershtein and Tova Milo and
                 Slava Novgorodov and May Shoshan",
  title =        "{PHOcus}: efficiently archiving photos",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3630--3633",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554861",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554861",
  abstract =     "Our ability to collect data is rapidly outstripping
                 our ability to effectively store and use it.
                 Organizations are therefore facing tough decisions of
                 what data to archive (or dispose of) to effectively
                 meet their business goals. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2022:VTE,
  author =       "Kai Huang and Qingqing Ye and Jing Zhao and Xi Zhao
                 and Haibo Hu and Xiaofang Zhou",
  title =        "{VINCENT}: towards efficient exploratory subgraph
                 search in graph databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3634--3637",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554862",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554862",
  abstract =     "Exploratory search is a search paradigm that plays a
                 vital role in databases, data mining, and information
                 retrieval to assist users to get familiar with the
                 underlying databases. It supports iterative query
                 formulation to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Drien:2022:AAP,
  author =       "Osnat Drien and Matanya Freiman and Yael Amsterdamer",
  title =        "{ActivePDB}: active probabilistic databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3638--3641",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554863",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554863",
  abstract =     "We present a novel framework for uncertain data
                 management, called ActivePDB. We are given a relational
                 probabilistic database, where each tuple is correct
                 with some probability; e.g., a database constructed
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Teofili:2022:CED,
  author =       "Tommaso Teofili and Donatella Firmani and Nick Koudas
                 and Paolo Merialdo and Divesh Srivastava",
  title =        "{CERTEM}: explaining and debugging black-box entity
                 resolution systems with {CERTA}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3642--3645",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554864",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554864",
  abstract =     "Entity resolution (ER) aims at identifying record
                 pairs that refer to the same real-world entity. Recent
                 works have focused on deep learning (DL) techniques, to
                 solve this problem. While such works have brought
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Aksoy:2022:SIS,
  author =       "Ahmet Kerem Aksoy and Pavel Dushev and Eleni Tzirita
                 Zacharatou and Holmer Hemsen and Marcela Charfuelan and
                 Jorge-Arnulfo Quian{\'e}-Ruiz and Beg{\"u}m Demir and
                 Volker Markl",
  title =        "Satellite image search in {AgoraEO}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3646--3649",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554865",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554865",
  abstract =     "The growing operational capability of global Earth
                 Observation (EO) creates new opportunities for
                 data-driven approaches to understand and protect our
                 planet. However, the current use of EO archives is very
                 restricted \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yan:2022:SDD,
  author =       "Li Yan and Nerissa Xu and Guozhong Li and Sourav S
                 Bhowmick and Byron Choi and Jianliang Xu",
  title =        "{SENSOR}: data-driven construction of sketch-based
                 visual query interfaces for time series data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3650--3653",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554866",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554866",
  abstract =     "Sketching is a common approach to visually query time
                 series data. However, a recent study reported that
                 sketching a pattern for querying is ``often ineffective
                 on its own'' in practice due to lack of
                 ``representative objects'' to facilitate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bonifati:2022:DPG,
  author =       "Angela Bonifati and Stefania Dumbrava and Emile
                 Martinez and Fatemeh Ghasemi and Malo Jaffr{\'e} and
                 Pac{\^o}me Luton and Thomas Pickles",
  title =        "{DiscoPG}: property graph schema discovery and
                 exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3654--3657",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554867",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554867",
  abstract =     "Property graphs are becoming pervasive in a variety of
                 graph processing applications using interconnected
                 data. They allow to encode multi-labeled nodes and
                 edges, as well as their properties, represented as
                 key/value \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Maamar-Kouadri:2022:SQO,
  author =       "Wissam Maamar-Kouadri and Salima Benbernou and Mourad
                 Ouziri and Themis Palpanas and Iheb {Ben Amor}",
  title =        "{SA-Q}: observing, evaluating, and enhancing the
                 quality of the results of sentiment analysis tools",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3658--3661",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554868",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554868",
  abstract =     "Sentiment analysis has received constant research
                 attention due to its usefulness and importance in
                 different applications. However, despite the research
                 advances in this field, most current tools suffer in
                 prediction \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Orogat:2022:SDA,
  author =       "Abdelghny Orogat and Ahmed El-Roby",
  title =        "{SmartBench}: demonstrating automatic generation of
                 comprehensive benchmarks for question answering over
                 knowledge graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3662--3665",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554869",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554869",
  abstract =     "In recent years, a significant number of question
                 answering (QA) systems that retrieve answers to natural
                 language questions from knowledge graphs (KG) have been
                 introduced. However, finding a benchmark that
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tu:2022:DHE,
  author =       "Jianhong Tu and Xiaoyue Han and Ju Fan and Nan Tang
                 and Chengliang Chai and Guoliang Li and Xiaoyong Du",
  title =        "{DADER}: hands-off entity resolution with domain
                 adaptation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3666--3669",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554870",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554870",
  abstract =     "Entity resolution (ER) is a core data integration
                 problem that identifies pairs of data instances
                 referring to the same real-world entities, and the
                 state-of-the-art results of ER are achieved by deep
                 learning (DL) based \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gale:2022:SWS,
  author =       "James Gale and Max Seiden and Deepanshu Utkarsh and
                 Jason Frantz and Rob Woollen and {\c{C}}a{\u{g}}atay
                 Demiralp",
  title =        "Sigma workbook: a spreadsheet for cloud data
                 warehouses",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3670--3673",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554871",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554871",
  abstract =     "Cloud data warehouses (CDWs) bring large-scale data
                 and compute power closer to users in enterprises.
                 However, existing tools for analyzing data in CDWs are
                 either limited in ad-hoc transformations or difficult
                 to use for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2022:RMC,
  author =       "Zihao Chen and Zhizhen Xu and Baokun Han and Chen Xu
                 and Weining Qian and Aoying Zhou",
  title =        "{ReMac}: a matrix computation system with redundancy
                 elimination",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3674--3677",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554872",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554872",
  abstract =     "Distributed matrix computation solutions support query
                 interfaces of linear algebra expressions, which often
                 contain redundancy, i.e., common and loop-constant
                 subexpressions. However, existing solutions fail
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wenig:2022:TBT,
  author =       "Phillip Wenig and Sebastian Schmidl and Thorsten
                 Papenbrock",
  title =        "{TimeEval}: a benchmarking toolkit for time series
                 anomaly detection algorithms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3678--3681",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554873",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554873",
  abstract =     "Detecting anomalous subsequences in time series is an
                 important task in time series analytics because it
                 serves the identification of special events, such as
                 production faults, delivery bottlenecks, system
                 defects, or heart \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lerner:2022:DAH,
  author =       "Alberto Lerner and Matthias Jasny and Theo Jepsen and
                 Carsten Binnig and Philippe Cudr{\'e}-Mauroux",
  title =        "{DBMS} annihilator: a high-performance database
                 workload generator in action",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3682--3685",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554874",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554874",
  abstract =     "Modern DBMS engines can achieve unprecedented
                 transaction processing speeds thanks to the invention
                 of clever data structures, concurrency schemes, and
                 improvements in CPU and memory subsystems. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liang:2022:FSF,
  author =       "Zhiyu Liang and Hongzhi Wang",
  title =        "{FedTSC}: a secure federated learning system for
                 interpretable time series classification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3686--3689",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554875",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554875",
  abstract =     "We demonstrate FedTSC, a novel federated learning (FL)
                 system for interpretable time series classification
                 (TSC). FedTSC is an FL-based TSC solution that makes a
                 great balance among security, interpretability,
                 accuracy, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2022:AVA,
  author =       "Qingshun Wu and Yafei Li and Huiling Li and Di Zhang
                 and Guanglei Zhu",
  title =        "{AMRAS}: a visual analysis system for spatial
                 crowdsourcing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3690--3693",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554876",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554876",
  abstract =     "The wide adoption of GPS-enabled smart devices has
                 greatly promoted spatial crowdsourcing, where the core
                 issue is how to assign tasks to workers efficiently and
                 with high quality. In this paper, we build a novel
                 visual \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Al-Sayeh:2022:SCA,
  author =       "Hani Al-Sayeh and Muhammad Attahir Jibril and Muhammad
                 Waleed {Bin Saeed} and Kai-Uwe Sattler",
  title =        "{SparkCAD}: caching anomalies detector for {Spark}
                 applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3694--3697",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554877",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554877",
  abstract =     "Developers of Apache Spark applications can accelerate
                 their workloads by caching suitable intermediate
                 results in memory and reusing them rather than
                 recomputing them all over again every time they are
                 needed. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{vLeeuwen:2022:AQP,
  author =       "Wilco v. Leeuwen and Thomas Mulder and Bram van de
                 Wall and George Fletcher and Nikolay Yakovets",
  title =        "{AvantGraph} query processing engine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3698--3701",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554878",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554878",
  abstract =     "We demonstrate AvantGraph, a graph query processing
                 engine developed by the Database group at TU Eindhoven.
                 Designed for efficient processing of both subgraph
                 matching and navigational graph queries, AvantGraph
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Boniol:2022:TNL,
  author =       "Paul Boniol and John Paparrizos and Yuhao Kang and
                 Themis Palpanas and Ruey S. Tsay and Aaron J. Elmore
                 and Michael J. Franklin",
  title =        "{Theseus}: navigating the labyrinth of time-series
                 anomaly detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3702--3705",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554879",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554879",
  abstract =     "The detection of anomalies in time series has gained
                 ample academic and industrial attention, yet, no
                 comprehensive benchmark exists to evaluate time-series
                 anomaly detection methods. Therefore, there is no final
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hofmann:2022:DAS,
  author =       "Dennis Hofmann and Peter VanNostrand and Huayi Zhang
                 and Yizhou Yan and Lei Cao and Samuel Madden and Elke
                 Rundensteiner",
  title =        "A demonstration of {AutoOD}: a self-tuning anomaly
                 detection system",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3706--3709",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554880",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554880",
  abstract =     "Anomaly detection is a critical task in applications
                 like preventing financial fraud, system malfunctions,
                 and cybersecurity attacks. While previous research has
                 offered a plethora of anomaly detection algorithms,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gakhar:2022:POA,
  author =       "Sunny Gakhar and Joyce Cahoon and Wangchao Le and
                 Xiangnan Li and Kaushik Ravichandran and Hiren Patel
                 and Marc Friedman and Brandon Haynes and Shi Qiao and
                 Alekh Jindal and Jyoti Leeka",
  title =        "{Pipemizer}: an optimizer for analytics data
                 pipelines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3710--3713",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554881",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554881",
  abstract =     "We demonstrate Pipemizer, an optimizer and recommender
                 aimed at improving the performance of queries or jobs
                 in pipelines. These job pipelines are ubiquitous in
                 modern data analytics due to jobs reading output files
                 written \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Redyuk:2022:DAA,
  author =       "Sergey Redyuk and Zoi Kaoudi and Sebastian Schelter
                 and Volker Markl",
  title =        "{DORIAN} in action: assisted design of data science
                 pipelines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3714--3717",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554882",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554882",
  abstract =     "Existing automated machine learning solutions and
                 intelligent discovery assistants are popular tools that
                 facilitate the end-user with the design of data science
                 (DS) pipelines. However, they yield limited \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{He:2022:WDN,
  author =       "Yuntian He and Yue Zhang and Saket Gurukar and
                 Srinivasan Parthasarathy",
  title =        "{WebMILE}: democratizing network representation
                 learning at scale",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3718--3721",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554883",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554883",
  abstract =     "In recent years, we have seen the success of network
                 representation learning (NRL) methods in diverse
                 domains ranging from computational chemistry to drug
                 discovery and from social network analysis to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Geisler:2022:DQQ,
  author =       "Nadja Geisler and Benjamin H{\"a}ttasch and Carsten
                 Binnig",
  title =        "Demonstrating quest: a query-driven framework to
                 explain classification models on tabular data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3722--3725",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554884",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554884",
  abstract =     "Machine learning models are everywhere now; but only
                 few of them are transparent in how they work. To remedy
                 this, local explanations aim to show users how and why
                 learned models produce a certain output for a given
                 input \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ripberger:2022:IID,
  author =       "Drew Ripberger and Yifan Gan and Xueyuan Ren and
                 Spyros Blanas and Yang Wang",
  title =        "{IsoBugView}: interactively debugging isolation bugs
                 in database applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3726--3729",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554885",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554885",
  abstract =     "Database applications frequently use weaker isolation
                 levels, such as Read Committed, for better performance,
                 which may lead to bugs that do not happen under
                 Serializable. Although a number of works have
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Foufoulas:2022:YRU,
  author =       "Yannis Foufoulas and Alkis Simitsis and Yannis
                 Ioannidis",
  title =        "{YeSQL}: rich user-defined functions without the
                 overhead",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3730--3733",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554886",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554886",
  abstract =     "The diversity and complexity of modern data management
                 applications led to the extension of the relational
                 paradigm with syntactic and semantic support for
                 User-Defined Functions (UDFs). Although
                 well-established in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2022:DAM,
  author =       "Zhihui Yang and Yicong Huang and Zuozhi Wang and Feng
                 Gao and Yao Lu and Chen Li and X. Sean Wang",
  title =        "Demonstration of accelerating machine learning
                 inference queries with correlative proxy models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3734--3737",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554887",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554887",
  abstract =     "We will demonstrate a prototype query-processing
                 engine, which utilizes correlations among predicates to
                 accelerate machine learning (ML) inference queries on
                 unstructured data. Expensive operators such as feature
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2022:DCI,
  author =       "Xiaozhen Liu and Zuozhi Wang and Shengquan Ni and
                 Sadeem Alsudais and Yicong Huang and Avinash Kumar and
                 Chen Li",
  title =        "Demonstration of collaborative and interactive
                 workflow-based data analytics in {Texera}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3738--3741",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554888",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554888",
  abstract =     "Collaborative data analytics is becoming increasingly
                 important due to the higher complexity of data science,
                 more diverse skills from different disciplines, more
                 common asynchronous schedules of team members, and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zalipynis:2022:SAR,
  author =       "Ramon Antonio Rodriges Zalipynis",
  title =        "{SimDB} in action: road traffic simulations completely
                 inside array {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3742--3745",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554889",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554889",
  abstract =     "Array DBMSs operate on big N -d arrays. Cellular
                 automata (CA) work on a discrete lattice of cells,
                 essentially on N -d arrays. CA facilitate decision
                 support as they realistically simulate complex
                 phenomena including road \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Badaro:2022:TTD,
  author =       "Gilbert Badaro and Paolo Papotti",
  title =        "Transformers for tabular data representation: a
                 tutorial on models and applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3746--3749",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554890",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554890",
  abstract =     "In the last few years, the natural language processing
                 community witnessed advances in neural representations
                 of free texts with transformer-based language models
                 (LMs). Given the importance of knowledge available in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kiehn:2022:PDM,
  author =       "Felix Kiehn and Mareike Schmidt and Daniel Glake and
                 Fabian Panse and Wolfram Wingerath and Benjamin Wollmer
                 and Martin Poppinga and Norbert Ritter",
  title =        "Polyglot data management: state of the art \& open
                 challenges",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3750--3753",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554891",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554891",
  abstract =     "Due to the increasing variety of the current database
                 landscape, polyglot data management has become a hot
                 research topic in recent years. The underlying idea is
                 to combine the benefits of different data stores
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wasay:2022:MPT,
  author =       "Abdul Wasay and Nesime Tatbul and Justin Gottschlich",
  title =        "Machine programming: turning data into programmer
                 productivity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3754--3757",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554892",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554892",
  abstract =     "Machine programming is an emerging research area that
                 improves the software development life cycle from
                 design through deployment. We present a tutorial on
                 machine programming research highlighting aspects
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2022:CDN,
  author =       "Guoliang Li and Haowen Dong and Chao Zhang",
  title =        "Cloud databases: new techniques, challenges, and
                 opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3758--3761",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554893",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554893",
  abstract =     "As database vendors are increasingly moving towards
                 the cloud data service, i.e., databases as a service
                 (DBaaS), cloud databases have become prevalent.
                 Compared with the early cloud-hosted databases, the new
                 generation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mhedhbi:2022:MTQ,
  author =       "Amine Mhedhbi and Semih Salihoglu",
  title =        "Modern techniques for querying graph-structured
                 relations: foundations, system implementations, and
                 open challenges",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3762--3765",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554894",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554894",
  abstract =     "The last decade has seen an emergence of numerous
                 specialized graph DBMSs (GDBMSs) as well as
                 graph-optimized extensions of RDBMSs. In addition,
                 several query processing techniques, such as worst-case
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fang:2022:DSD,
  author =       "Yixiang Fang and Wensheng Luo and Chenhao Ma",
  title =        "Densest subgraph discovery on large graphs:
                 applications, challenges, and techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3766--3769",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554895",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554895",
  abstract =     "As one of the most fundamental problems in graph data
                 mining, the densest subgraph discovery (DSD) problem
                 has found a broad spectrum of real applications, such
                 as social network community detection, graph index
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Trummer:2022:BGC,
  author =       "Immanuel Trummer",
  title =        "From {BERT} to {GPT-3} codex: harnessing the potential
                 of very large language models for data management",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3770--3773",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554896",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554896",
  abstract =     "Large language models have recently advanced the state
                 of the art on many natural language processing
                 benchmarks. The newest generation of models can be
                 applied to a variety of tasks with little to no
                 specialized \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2022:PPF,
  author =       "Kaisong Huang and Yuliang He and Tianzheng Wang",
  title =        "The past, present and future of indexing on persistent
                 memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3774--3777",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554897",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554897",
  abstract =     "Persistent memory (PM) based indexing techniques have
                 been proposed to build fast yet persistent indexes that
                 sit on the memory bus. Over the past decade, numerous
                 techniques have been proposed with various \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kaoudi:2022:UDA,
  author =       "Zoi Kaoudi and Jorge-Arnulfo Quian{\'e}-Ruiz",
  title =        "Unified data analytics: state-of-the-art and open
                 problems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3778--3781",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554898",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554898",
  abstract =     "There is an urgent need for unifying data analytics as
                 more and more application tasks become more complex:
                 Nowadays, it is normal to see tasks performing data
                 preparation, analytical processing, and machine
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2022:BGC,
  author =       "Wenfei Fan",
  title =        "Big graphs: challenges and opportunities",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3782--3797",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554899",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554899",
  abstract =     "Big data is typically characterized with 4V's: Volume,
                 Velocity, Variety and Veracity. When it comes to big
                 graphs, these challenges become even more staggering.
                 Each and every of the 4V's raises new questions, from
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Amer-Yahia:2022:TAP,
  author =       "Sihem Amer-Yahia",
  title =        "Towards {AI-powered} data-driven education",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3798--3806",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554900",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554900",
  abstract =     "Educational platforms are increasingly becoming
                 AI-driven. Besides providing a wide range of course
                 filtering options, personalized recommendations of
                 learning material and teachers are driving today's
                 research. While \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2022:HIN,
  author =       "Yizhou Sun and Jiawei Han and Xifeng Yan and Philip S.
                 Yu and Tianyi Wu",
  title =        "Heterogeneous information networks: the past, the
                 present, and the future",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3807--3811",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554901",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554901",
  abstract =     "In 2011, we proposed PathSim to systematically define
                 and compute similarity between nodes in a heterogeneous
                 information network (HIN), where nodes and links are
                 from different types. In the PathSim paper, we for the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Roy:2022:TIA,
  author =       "Sudeepa Roy",
  title =        "Toward interpretable and actionable data analysis with
                 explanations and causality",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3812--3820",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554902",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554902",
  abstract =     "We live in a world dominated by data, where users from
                 different fields routinely collect, study, and make
                 decisions supported by data. To aid these users, the
                 current trend in data analysis is to design tools that
                 allow \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ozcan:2022:RMD,
  author =       "Fatma {\"O}zcan",
  title =        "Reflections on my data management research journey
                 ({VLDB} women in database research award talk)",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3821--3822",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554903",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554903",
  abstract =     "Data-driven decision making is critical for all kinds
                 of enterprises, public and private. It has been my
                 mission to find more efficient, and effective ways to
                 store, manage, query and analyze data to drive
                 actionable \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mohan:2022:PSF,
  author =       "C. Mohan",
  title =        "Panel: startups founded by database researchers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3823--3825",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554904",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554904",
  abstract =     "This in-person panel, which I will be moderating, will
                 focus on startups founded by worldwide database
                 researchers. The panelists are a set of people with
                 different backgrounds in terms of their geographic
                 locations, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Balazinska:2022:CDS,
  author =       "Magdalena Balazinska and Surajit Chaudhuri and AnHai
                 Doan and Joseph M. Hellerstein and Hanuma Kodavalla and
                 Ippokratis Pandis and Matei Zaharia",
  title =        "Cloud data systems: what are the opportunities for the
                 database research community?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "12",
  pages =        "3826--3827",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3554821.3554905",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:11:07 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3554821.3554905",
  abstract =     "The panel will discuss the research opportunities for
                 the database research community in the context of cloud
                 native data services.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{John:2022:HDD,
  author =       "Sachin Basil John and Christoph Koch",
  title =        "High-Dimensional Data Cubes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3828--3840",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565839",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565839",
  abstract =     "This paper introduces an approach to supporting
                 high-dimensional data cubes at interactive query speeds
                 and moderate storage cost. The approach is based on
                 binary(-domain) data cubes that are judiciously
                 partially materialized; the missing information
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ceccarello:2022:FSM,
  author =       "Matteo Ceccarello and Johann Gamper",
  title =        "Fast and Scalable Mining of Time Series Motifs with
                 Probabilistic Guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3841--3853",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565840",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565840",
  abstract =     "Mining time series motifs is a fundamental, yet
                 expensive task in exploratory data analytics. In this
                 paper, we therefore propose a fast method to find the
                 top- k motifs with probabilistic guarantees. Our
                 probabilistic approach is based on Locality \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Deutch:2022:FEF,
  author =       "Daniel Deutch and Amir Gilad and Tova Milo and Amit
                 Mualem and Amit Somech",
  title =        "{FEDEX}: an Explainability Framework for Data
                 Exploration Steps",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3854--3868",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565841",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565841",
  abstract =     "When exploring a new dataset, Data Scientists often
                 apply analysis queries, look for insights in the
                 resulting dataframe, and repeat to apply further
                 queries. We propose in this paper a novel solution that
                 assists data scientists in this laborious \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xekalaki:2022:ETA,
  author =       "Maria Xekalaki and Juan Fumero and Athanasios
                 Stratikopoulos and Katerina Doka and Christos
                 Katsakioris and Constantinos Bitsakos and Nectarios
                 Koziris and Christos Kotselidis",
  title =        "Enabling Transparent Acceleration of Big Data
                 Frameworks Using Heterogeneous Hardware",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3869--3882",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565842",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565842",
  abstract =     "The ever-increasing demand for high performance Big
                 Data analytics and data processing, has paved the way
                 for heterogeneous hardware accelerators, such as
                 Graphics Processing Units (GPUs) and Field Programmable
                 Gate Arrays (FPGAs), to be integrated into \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fazzone:2022:DPN,
  author =       "Adriano Fazzone and Tommaso Lanciano and Riccardo
                 Denni and Charalampos E. Tsourakakis and Francesco
                 Bonchi",
  title =        "Discovering Polarization Niches via Dense Subgraphs
                 with Attractors and Repulsers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3883--3896",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565843",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565843",
  abstract =     "Detecting niches of polarization in social media is a
                 first step towards deploying mitigation strategies and
                 avoiding radicalization. In this paper, we model
                 polarization niches as close-knit dense communities of
                 users, which are under the influence of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2022:SSU,
  author =       "Eunjae Lee and Sam H. Noh and Jiwon Seo",
  title =        "{Sage}: a System for Uncertain Network Analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3897--3910",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565844",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565844",
  abstract =     "We propose Sage, a system for uncertain network
                 analysis. Algorithms for uncertain network analysis
                 require large amounts of memory and computing resources
                 as they sample a large number of network instances and
                 run analysis on them. Sage makes uncertain \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Qin:2022:MBC,
  author =       "Hongchao Qin and Rong-Hua Li and Ye Yuan and Guoren
                 Wang and Lu Qin and Zhiwei Zhang",
  title =        "Mining Bursting Core in Large Temporal Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3911--3923",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565845",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565845",
  abstract =     "Temporal graphs are ubiquitous. Mining communities
                 that are bursting in a period of time is essential for
                 seeking real emergency events in temporal graphs.
                 Unfortunately, most previous studies on community
                 mining in temporal networks ignore the bursting
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yu:2022:CBL,
  author =       "Xiang Yu and Chengliang Chai and Guoliang Li and
                 Jiabin Liu",
  title =        "Cost-Based or Learning-Based?: a Hybrid Query
                 Optimizer for Query Plan Selection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3924--3936",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565846",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565846",
  abstract =     "Traditional cost-based optimizers are efficient and
                 stable to generate optimal plans for simple SQL
                 queries, but they may not generate high-quality plans
                 for complicated queries. Thus learning-based optimizers
                 have been proposed recently that can learn \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Meng:2022:OIA,
  author =       "Jingfan Meng and Huayi Wang and Jun Xu and Mitsunori
                 Ogihara",
  title =        "{ONe Index for All Kernels (ONIAK)}: a Zero
                 Re-Indexing {LSH} Solution to {ANNS-ALT (After Linear
                 Transformation)}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3937--3949",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565847",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565847",
  abstract =     "In this work, we formulate and solve a new type of
                 approximate nearest neighbor search (ANNS) problems
                 called ANNS after linear transformation (ALT). In
                 ANNS-ALT, we search for the vector (in a dataset) that,
                 after being linearly transformed by a user-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shi:2022:LIB,
  author =       "Jiachen Shi and Gao Cong and Xiao-Li Li",
  title =        "Learned Index Benefits: Machine Learning Based Index
                 Performance Estimation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3950--3962",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565848",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565848",
  abstract =     "Index selection remains one of the most challenging
                 problems in relational database management systems. To
                 find an optimum index configuration for a workload,
                 accurately and efficiently quantifying the benefits of
                 each candidate index configuration is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:ORM,
  author =       "Jiachuan Wang and Peng Cheng and Libin Zheng and Lei
                 Chen and Wenjie Zhang",
  title =        "Online Ridesharing with Meeting Points",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3963--3975",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565849",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565849",
  abstract =     "Nowadays, ridesharing becomes a popular commuting
                 mode. Dynamically arriving riders post their origins
                 and destinations, then the platform assigns drivers to
                 serve them. In ridesharing, different groups of riders
                 can be served by one driver if their \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bellomarini:2022:EPE,
  author =       "Luigi Bellomarini and Davide Benedetto and Matteo
                 Brandetti and Emanuel Sallinger",
  title =        "Exploiting the Power of Equality-Generating
                 Dependencies in Ontological Reasoning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3976--3988",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565850",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565850",
  abstract =     "Equality-generating dependencies (EGDs) allow to fully
                 exploit the power of existential quantification in
                 ontological reasoning settings modeled via
                 Tuple-Generating Dependencies (TGDs), by enabling
                 value-assignment or forcing the equivalence of fresh
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Aamand:2022:NRF,
  author =       "Anders Aamand and Debarati Das and Evangelos
                 Kipouridis and Jakob B. T. Knudsen and Peter M. R.
                 Rasmussen and Mikkel Thorup",
  title =        "No Repetition: Fast and Reliable Sampling with Highly
                 Concentrated Hashing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "3989--4001",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565851",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565851",
  abstract =     "Stochastic sample-based estimators are among the most
                 fundamental and universally applied tools in
                 statistics. Such estimators are particularly important
                 when processing huge amounts of data, where we need to
                 be able to answer a wide range of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Attouche:2022:WGJ,
  author =       "Lyes Attouche and Mohamed-Amine Baazizi and Dario
                 Colazzo and Giorgio Ghelli and Carlo Sartiani and
                 Stefanie Scherzinger",
  title =        "Witness Generation for {JSON} Schema",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "4002--4014",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565852",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565852",
  abstract =     "JSON Schema is a schema language for JSON documents,
                 based on a complex combination of structural operators,
                 Boolean operators (negation included), and recursive
                 variables. The static analysis of JSON Schema documents
                 comprises practically relevant \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shankar:2022:TOP,
  author =       "Shreya Shankar and Aditya G. Parameswaran",
  title =        "Towards Observability for Production Machine Learning
                 Pipelines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "4015--4022",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565853",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565853",
  abstract =     "Software organizations are increasingly incorporating
                 machine learning (ML) into their product offerings,
                 driving a need for new data management tools. Many of
                 these tools facilitate the initial development of ML
                 applications, but sustaining these \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2022:DES,
  author =       "Sekwon Lee and Soujanya Ponnapalli and Sharad Singhal
                 and Marcos K. Aguilera and Kimberly Keeton and Vijay
                 Chidambaram",
  title =        "{DINOMO}: an Elastic, Scalable, High-Performance
                 Key-Value Store for Disaggregated Persistent Memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "4023--4037",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565854",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565854",
  abstract =     "We present Dinomo, a novel key-value store for
                 disaggregated persistent memory (DPM). Dinomo is the
                 first key-value store for DPM that simultaneously
                 achieves high common-case performance, scalability, and
                 lightweight online reconfiguration. We observe
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shankar:2022:BCR,
  author =       "Shreya Shankar and Stephen Macke and Sarah Chasins and
                 Andrew Head and Aditya Parameswaran",
  title =        "Bolt-on, Compact, and Rapid Program Slicing for
                 Notebooks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "4038--4047",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565855",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565855",
  abstract =     "Computational notebooks are commonly used for
                 iterative workflows, such as in exploratory data
                 analysis. This process lends itself to the accumulation
                 of old code and hidden state, making it hard for users
                 to reason about the lineage of, e.g., plots \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2022:FMT,
  author =       "Weijie Sun and Zihuan Xu and Lei Chen",
  title =        "Fairness Matters: a Tit-for-Tat Strategy Against
                 Selfish Mining",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "4048--4061",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565856",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565856",
  abstract =     "The proof-of-work (PoW) based blockchains are more
                 secure nowadays since profit-oriented miners contribute
                 more computing powers in exchange for fair revenues.
                 This virtuous circle only works under an
                 incentive-compatible consensus, which is found to be
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ding:2022:SIO,
  author =       "Jialin Ding and Ryan Marcus and Andreas Kipf and
                 Vikram Nathan and Aniruddha Nrusimha and Kapil Vaidya
                 and Alexander van Renen and Tim Kraska",
  title =        "{SageDB}: an Instance-Optimized Data Analytics
                 System",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "4062--4078",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565857",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565857",
  abstract =     "Modern data systems are typically both complex and
                 general-purpose. They are complex because of the
                 numerous internal knobs and parameters that users need
                 to manually tune in order to achieve good performance;
                 they are general-purpose because they are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Richly:2022:BCF,
  author =       "Keven Richly and Rainer Schlosser and Martin
                 Boissier",
  title =        "Budget-Conscious Fine-Grained Configuration
                 Optimization for Spatio-Temporal Applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "4079--4092",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565858",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565858",
  abstract =     "Based on the performance requirements of modern
                 spatio-temporal data mining applications, in-memory
                 database systems are often used to store and process
                 the data. To efficiently utilize the scarce DRAM
                 capacities, modern database systems support various
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hsieh:2022:NGC,
  author =       "Cheng-Yu Hsieh and Jieyu Zhang and Alexander Ratner",
  title =        "{Nemo}: Guiding and Contextualizing Weak Supervision
                 for Interactive Data Programming",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "15",
  number =       "13",
  pages =        "4093--4105",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565838.3565859",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:02 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565838.3565859",
  abstract =     "Weak Supervision (WS) techniques allow users to
                 efficiently create large training datasets by
                 programmatically labeling data with heuristic sources
                 of supervision. While the success of WS relies heavily
                 on the provided labeling heuristics, the process
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Helt:2022:CCC,
  author =       "Jeffrey Helt and Abhinav Sharma and Daniel J. Abadi
                 and Wyatt Lloyd and Jose M. Faleiro",
  title =        "{C5}: cloned concurrency control that always keeps
                 up",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "1--14",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561262",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561262",
  abstract =     "Asynchronously replicated primary-backup databases are
                 commonly deployed to improve availability and offload
                 read-only transactions. To both apply replicated writes
                 from the primary and serve read-only transactions, the
                 backups implement a cloned \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:CDS,
  author =       "Ruihong Wang and Jianguo Wang and Stratos Idreos and
                 M. Tamer {\"O}zsu and Walid G. Aref",
  title =        "The case for distributed shared-memory databases with
                 {RDMA}-enabled memory disaggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "15--22",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561263",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561263",
  abstract =     "Memory disaggregation (MD) allows for scalable and
                 elastic data center design by separating compute (CPU)
                 from memory. With MD, compute and memory are no longer
                 coupled into the same server box. Instead, they are
                 connected to each other via ultra-fast \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2022:FED,
  author =       "Chenyuan Wu and Mohammad Javad Amiri and Jared Asch
                 and Heena Nagda and Qizhen Zhang and Boon Thau Loo",
  title =        "{FlexChain}: an elastic disaggregated blockchain",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "23--36",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561264",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561264",
  abstract =     "While permissioned blockchains enable a family of data
                 center applications, existing systems suffer from
                 imbalanced loads across compute and memory,
                 exacerbating the underutilization of cloud resources.
                 This paper presents FlexChain, a novel \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2022:MNL,
  author =       "Zhen Zhang and Shuai Zheng and Yida Wang and Justin
                 Chiu and George Karypis and Trishul Chilimbi and Mu Li
                 and Xin Jin",
  title =        "{MiCS}: near-linear scaling for training gigantic
                 model on public cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "37--50",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561265",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561265",
  abstract =     "Existing general purpose frameworks for gigantic model
                 training, i.e., dense models with billions of
                 parameters, cannot scale efficiently on cloud
                 environment with various networking conditions due to
                 large communication overheads. In this paper, we
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2022:PPC,
  author =       "Yi Yang and Yurong Cheng and Ye Yuan and Guoren Wang
                 and Lei Chen and Yongjiao Sun",
  title =        "Privacy-preserving cooperative online matching over
                 spatial crowdsourcing platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "51--63",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561266",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561266",
  abstract =     "With the continuous development of spatial
                 crowdsourcing platform, online task assignment problem
                 has been widely studied as a typical problem in spatial
                 crowdsourcing. Most of the existing studies are based
                 on a single-platform task assignment to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:CMT,
  author =       "Jiayi Wang and Chengliang Chai and Nan Tang and Jiabin
                 Liu and Guoliang Li",
  title =        "Coresets over multiple tables for feature-rich and
                 data-efficient machine learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "64--76",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561267",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561267",
  abstract =     "Successful machine learning (ML) needs to learn from
                 good data. However, one common issue about train data
                 for ML practitioners is the lack of good features. To
                 mitigate this problem, feature augmentation is often
                 employed by joining with (or enriching \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2022:SMM,
  author =       "Zihao Zhang and Huiqi Hu and Xuan Zhou and Jiang
                 Wang",
  title =        "{Starry}: multi-master transaction processing on
                 semi-leader architecture",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "77--89",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561268",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561268",
  abstract =     "Multi-master architecture is desirable for cloud
                 databases in supporting large-scale transaction
                 processing. To enable concurrent transaction execution
                 on multiple computing nodes, we need an efficient
                 transaction commit protocol on the storage layer
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Skitsas:2022:SSE,
  author =       "Konstantinos Skitsas and Ioannis G. Papageorgiou and
                 Mohammad Sadegh Talebi and Verena Kantere and Michael
                 N. Katehakis and Panagiotis Karras",
  title =        "{SIFTER}: space-efficient value iteration for
                 finite-horizon {MDPs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "90--98",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561269",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561269",
  abstract =     "Can we solve finite-horizon Markov decision processes
                 (FHMDPs) while raising low memory requirements? Such
                 models find application in many cases where a
                 decision-making agent needs to act in a probabilistic
                 environment, from resource management to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yu:2022:TUP,
  author =       "Geoffrey X. Yu and Markos Markakis and Andreas Kipf
                 and Per-{\AA}ke Larson and Umar Farooq Minhas and Tim
                 Kraska",
  title =        "{TreeLine}: an update-in-place key-value store for
                 modern storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "99--112",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561270",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561270",
  abstract =     "Many modern key-value stores, such as RocksDB, rely on
                 log-structured merge trees (LSMs). Originally designed
                 for spinning disks, LSMs optimize for write performance
                 by only making sequential writes. But this optimization
                 comes at the cost of reads: \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tao:2022:DPE,
  author =       "Yuchao Tao and Amir Gilad and Ashwin Machanavajjhala
                 and Sudeepa Roy",
  title =        "{DPXPlain}: privately explaining aggregate query
                 answers",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "1",
  pages =        "113--126",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3561261.3561271",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Thu Nov 17 11:06:34 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3561261.3561271",
  abstract =     "Differential privacy (DP) is the state-of-the-art and
                 rigorous notion of privacy for answering aggregate
                 database queries while preserving the privacy of
                 sensitive information in the data. In today's era of
                 data analysis, however, it poses new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chang:2022:EMP,
  author =       "Lijun Chang and Mouyi Xu and Darren Strash",
  title =        "Efficient maximum $k$-plex computation over large
                 sparse graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "127--139",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565817",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565817",
  abstract =     "The k -plex model is a relaxation of the clique model
                 by allowing every vertex to miss up to k neighbors.
                 Designing exact and efficient algorithms for computing
                 a maximum k -plex in a graph has been receiving
                 increasing interest recently. However, the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hu:2022:OSE,
  author =       "Tianxun Hu and Tianzheng Wang and Qingqing Zhou",
  title =        "Online schema evolution is (almost) free for snapshot
                 databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "140--153",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565818",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565818",
  abstract =     "Modern database applications often change their
                 schemas to keep up with the changing requirements.
                 However, support for online and transactional schema
                 evolution remains challenging in existing database
                 systems. Specifically, prior work often takes ad
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:LEH,
  author =       "Yifan Wang and Haodi Ma and Daisy Zhe Wang",
  title =        "{LIDER}: an efficient high-dimensional learned index
                 for large-scale dense passage retrieval",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "154--166",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565819",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565819",
  abstract =     "Passage retrieval has been studied for decades, and
                 many recent approaches of passage retrieval are using
                 dense embeddings generated from deep neural models,
                 called ``dense passage retrieval''. The
                 state-of-the-art end-to-end dense passage retrieval
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shaham:2022:MMS,
  author =       "Sina Shaham and Gabriel Ghinita and Cyrus Shahabi",
  title =        "Models and mechanisms for spatial data fairness",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "167--179",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565820",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565820",
  abstract =     "Fairness in data-driven decision-making studies
                 scenarios where individuals from certain population
                 segments may be unfairly treated when being considered
                 for loan or job applications, access to public
                 resources, or other types of services. In location-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2022:IMR,
  author =       "Shixun Huang and Wenqing Lin and Zhifeng Bao and
                 Jiachen Sun",
  title =        "Influence maximization in real-world closed social
                 networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "180--192",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565821",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565821",
  abstract =     "In the last few years, many closed social networks
                 such as WhatsAPP and WeChat have emerged to cater for
                 people's growing demand of privacy and independence. In
                 a closed social network, the posted content is not
                 available to all users or senders can set \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bonifati:2022:TLI,
  author =       "Angela Bonifati and Francesco {Del Buono} and
                 Francesco Guerra and Donato Tiano",
  title =        "{Time2Feat}: learning interpretable representations
                 for multivariate time series clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "193--201",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565822",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565822",
  abstract =     "Clustering multivariate time series is a critical task
                 in many real-world applications involving multiple
                 signals and sensors. Existing systems aim to maximize
                 effectiveness, efficiency and scalability, but fail to
                 guarantee the interpretability of the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2022:OVF,
  author =       "Xiaochen Li and Yuke Hu and Weiran Liu and Hanwen Feng
                 and Li Peng and Yuan Hong and Kui Ren and Zhan Qin",
  title =        "{OpBoost}: a vertical federated tree boosting
                 framework based on order-preserving desensitization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "202--215",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565823",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565823",
  abstract =     "Vertical Federated Learning (FL) is a new paradigm
                 that enables users with non-overlapping attributes of
                 the same data samples to jointly train a model without
                 directly sharing the raw data. Nevertheless, recent
                 works show that it's still not sufficient \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Perera:2022:HSD,
  author =       "R. Malinga Perera and Bastian Oetomo and Benjamin I.
                 P. Rubinstein and Renata Borovica-Gajic",
  title =        "{HMAB}: self-driving hierarchy of bandits for
                 integrated physical database design tuning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "216--229",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565824",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565824",
  abstract =     "Effective physical database design tuning requires
                 selection of several physical design structures (PDS),
                 such as indices and materialised views, whose
                 combination influences overall system performance in a
                 non-linear manner. While the simplicity of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Palyvos-Giannas:2022:EEO,
  author =       "Dimitris Palyvos-Giannas and Katerina Tzompanaki and
                 Marina Papatriantafilou and Vincenzo Gulisano",
  title =        "{Erebus}: explaining the outputs of data streaming
                 queries",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "230--242",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565825",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565825",
  abstract =     "In data streaming, why-provenance can explain why a
                 given outcome is observed but offers no help in
                 understanding why an expected outcome is missing.
                 Explaining missing answers has been addressed in DBMSs,
                 but these solutions are not directly applicable
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2022:PPL,
  author =       "Zhou Zhang and Zhaole Chu and Peiquan Jin and Yongping
                 Luo and Xike Xie and Shouhong Wan and Yun Luo and Xufei
                 Wu and Peng Zou and Chunyang Zheng and Guoan Wu and
                 Andy Rudoff",
  title =        "{PLIN}: a persistent learned index for non-volatile
                 memory with high performance and instant recovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "243--255",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565826",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565826",
  abstract =     "Non-Volatile Memory (NVM) has emerged as an
                 alternative to next-generation main memories. Although
                 many tree indices have been proposed for NVM, they
                 generally use B+-tree-like structures. To further
                 improve the performance of NVM-aware indices, we
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:FFC,
  author =       "Zuozhi Wang and Shengquan Ni and Avinash Kumar and
                 Chen Li",
  title =        "{Fries}: fast and consistent runtime reconfiguration
                 in dataflow systems with transactional guarantees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "256--268",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565827",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565827",
  abstract =     "A computing job in a big data system can take a long
                 time to run, especially for pipelined executions on
                 data streams. Developers often need to change the
                 computing logic of the job such as fixing a loophole in
                 an operator or changing the machine \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xiao:2022:FAD,
  author =       "Renjie Xiao and Zijing Tan and Haojin Wang and Shuai
                 Ma",
  title =        "Fast approximate denial constraint discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "269--281",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565828",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565828",
  abstract =     "We investigate the problem of discovering approximate
                 denial constraints (DCs), for finding DCs that hold
                 with some exceptions to avoid overfitting real-life
                 dirty data and facilitate data cleaning tasks.
                 Different methods have been proposed to address
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:FDD,
  author =       "Haoyu Wang and Shaoxu Song",
  title =        "Frequency domain data encoding in {Apache IoTDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "282--290",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565829",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565829",
  abstract =     "Frequency domain analysis is widely conducted on time
                 series. While online transforming from time domain to
                 frequency domain is costly, e.g., by Fast Fourier
                 Transform (FFT), it is highly demanded to store the
                 frequency domain data for reuse. However, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zheng:2022:HMS,
  author =       "Jiping Zheng and Yuan Ma and Wei Ma and Yanhao Wang
                 and Xiaoyang Wang",
  title =        "Happiness maximizing sets under group fairness
                 constraints",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "291--303",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565830",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565830",
  abstract =     "Finding a happiness maximizing set (HMS) from a
                 database, i.e., selecting a small subset of tuples that
                 preserves the best score with respect to any
                 nonnegative linear utility function, is an important
                 problem in multi-criteria decision-making. When an
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Renggli:2022:SEF,
  author =       "Cedric Renggli and Xiaozhe Yao and Luka Kolar and Luka
                 Rimanic and Ana Klimovic and Ce Zhang",
  title =        "{SHiFT}: an efficient, flexible search engine for
                 transfer learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "304--316",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565831",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565831",
  abstract =     "Transfer learning can be seen as a data- and
                 compute-efficient alternative to training models from
                 scratch. The emergence of rich model repositories, such
                 as TensorFlow Hub, enables practitioners and
                 researchers to unleash the potential of these models
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Islam:2022:SCT,
  author =       "Md. Mouinul Islam and Dong Wei and Baruch Schieber and
                 Senjuti Basu Roy",
  title =        "Satisfying complex top-$k$ fairness constraints by
                 preference substitutions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "317--329",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565832",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565832",
  abstract =     "Given m users (voters), where each user casts her
                 preference for a single item (candidate) over n items
                 (candidates) as a ballot, the preference aggregation
                 problem returns k items (candidates) that have the k
                 highest number of preferences (votes). Our \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Karpov:2022:SSE,
  author =       "Nikolai Karpov and Qin Zhang",
  title =        "{SyncSignature}: a simple, efficient, parallelizable
                 framework for tree similarity joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "330--342",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565833",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565833",
  abstract =     "This paper introduces SyncSignature, the first fully
                 parallelizable algorithmic framework for tree
                 similarity joins under edit distance. SyncSignature
                 makes use of implicit-synchronized signature generation
                 schemes, which allow for an efficient and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2022:APG,
  author =       "Shuang Yang and Yahui Sun and Jiesong Liu and Xiaokui
                 Xiao and Rong-Hua Li and Zhewei Wei",
  title =        "Approximating probabilistic group {Steiner} trees in
                 graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "343--355",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565834",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565834",
  abstract =     "Consider an edge-weighted graph, and a number of
                 properties of interests (PoIs). Each vertex has a
                 probability of exhibiting each PoI. The joint
                 probability that a set of vertices exhibits a PoI is
                 the probability that this set contains at least one
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Papadias:2022:SER,
  author =       "Serafeim Papadias and Zoi Kaoudi and Jorge-Arnulfo
                 Quian{\'e}-Ruiz and Volker Markl",
  title =        "Space-efficient random walks on streaming graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "356--368",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565835",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565835",
  abstract =     "Graphs in many applications, such as social networks
                 and IoT, are inherently streaming, involving continuous
                 additions and deletions of vertices and edges at high
                 rates. Constructing random walks in a graph, i.e.,
                 sequences of vertices selected with a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:PPT,
  author =       "Pengfei Wang and Xiaocan Zeng and Lu Chen and Fan Ye
                 and Yuren Mao and Junhao Zhu and Yunjun Gao",
  title =        "{PromptEM}: prompt-tuning for low-resource generalized
                 entity matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "369--378",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565836",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565836",
  abstract =     "Entity Matching (EM), which aims to identify whether
                 two entity records from two relational tables refer to
                 the same real-world entity, is one of the fundamental
                 problems in data management. Traditional EM assumes
                 that two tables are homogeneous with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Guo:2022:CAC,
  author =       "Zhihan Guo and Xinyu Zeng and Kan Wu and Wuh-Chwen
                 Hwang and Ziwei Ren and Xiangyao Yu and Mahesh
                 Balakrishnan and Philip A. Bernstein",
  title =        "{Cornus}: atomic commit for a cloud {DBMS} with
                 storage disaggregation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "2",
  pages =        "379--392",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3565816.3565837",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Nov 25 08:53:26 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3565816.3565837",
  abstract =     "Two-phase commit (2PC) is widely used in distributed
                 databases to ensure atomicity of distributed
                 transactions. Conventional 2PC was originally designed
                 for the shared-nothing architecture and has two
                 limitations: long latency due to two eager log
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yuan:2022:RTT,
  author =       "Haitao Yuan and Guoliang Li and Zhifeng Bao",
  title =        "Route Travel Time Estimation on a Road Network
                 Revisited: Heterogeneity, Proximity, Periodicity and
                 Dynamicity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "393--405",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570691",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570691",
  abstract =     "In this paper, we revisit the problem of route travel
                 time estimation on a road network and aim to boost its
                 accuracy by capturing and utilizing spatio-temporal
                 features from four significant aspects: heterogeneity,
                 proximity, periodicity and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2022:SOM,
  author =       "Yongji Wu and Matthew Lentz and Danyang Zhuo and Yao
                 Lu",
  title =        "Serving and Optimizing Machine Learning Workflows on
                 Heterogeneous Infrastructures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "406--419",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570692",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570692",
  abstract =     "With the advent of ubiquitous deployment of smart
                 devices and the Internet of Things, data sources for
                 machine learning inference have increasingly moved to
                 the edge of the network. Existing machine learning
                 inference platforms typically assume a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Geng:2022:CRB,
  author =       "Zixuan Geng and Maximilian Schleich and Dan Suciu",
  title =        "Computing Rule-Based Explanations by Leveraging
                 Counterfactuals",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "420--432",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570693",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570693",
  abstract =     "Sophisticated machine models are increasingly used for
                 high-stakes decisions in everyday life. There is an
                 urgent need to develop effective explanation techniques
                 for such automated decisions. Rule-Based Explanations
                 have been proposed for high-stake \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Peng:2022:SSI,
  author =       "Jinfeng Peng and Derong Shen and Nan Tang and Tieying
                 Liu and Yue Kou and Tiezheng Nie and Hang Cui and Ge
                 Yu",
  title =        "Self-Supervised and Interpretable Data Cleaning with
                 Sequence Generative Adversarial Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "433--446",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570694",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570694",
  abstract =     "We study the problem of self-supervised and
                 interpretable data cleaning, which automatically
                 extracts interpretable data repair rules from dirty
                 data. In this paper, we propose a novel framework,
                 namely Garf, based on sequence generative adversarial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Romero:2022:OVA,
  author =       "Francisco Romero and Johann Hauswald and Aditi Partap
                 and Daniel Kang and Matei Zaharia and Christos
                 Kozyrakis",
  title =        "Optimizing Video Analytics with Declarative Model
                 Relationships",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "447--460",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570695",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570695",
  abstract =     "The availability of vast video collections and the
                 accuracy of ML models has generated significant
                 interest in video analytics systems. Since naively
                 processing all frames using expensive models is
                 impractical, researchers have proposed optimizations
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jiang:2022:SRT,
  author =       "Jiaxin Jiang and Yuan Li and Bingsheng He and Bryan
                 Hooi and Jia Chen and Johan Kok Zhi Kang",
  title =        "{Spade}: a Real-Time Fraud Detection Framework on
                 Evolving Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "461--469",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570696",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570696",
  abstract =     "Real-time fraud detection is a challenge for most
                 financial and electronic commercial platforms. To
                 identify fraudulent communities, Grab, one of the
                 largest technology companies in Southeast Asia, forms a
                 graph from a set of transactions and detects \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Miao:2022:GET,
  author =       "Xupeng Miao and Yujie Wang and Youhe Jiang and Chunan
                 Shi and Xiaonan Nie and Hailin Zhang and Bin Cui",
  title =        "{Galvatron}: Efficient Transformer Training over
                 Multiple {GPUs} Using Automatic Parallelism",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "470--479",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570697",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570697",
  abstract =     "Transformer models have achieved state-of-the-art
                 performance on various domains of applications and
                 gradually becomes the foundations of the advanced large
                 deep learning (DL) models. However, how to train these
                 models over multiple GPUs efficiently is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2022:IDL,
  author =       "Qitong Wang and Stephen Whitmarsh and Vincent Navarro
                 and Themis Palpanas",
  title =        "{iEDeaL}: a Deep Learning Framework for Detecting
                 Highly Imbalanced Interictal Epileptiform Discharges",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "480--490",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570698",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570698",
  abstract =     "Epilepsy is a chronic neurological disease, ranked as
                 the second most burdensome neurological disorder
                 worldwide. Detecting Interictal Epileptiform Discharges
                 (IEDs) is among the most important clinician operations
                 to support epilepsy diagnosis, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zapridou:2022:DLP,
  author =       "Eleni Zapridou and Ioannis Mytilinis and Anastasia
                 Ailamaki",
  title =        "{Dalton}: Learned Partitioning for Distributed Data
                 Streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "491--504",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570699",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570699",
  abstract =     "To sustain the input rate of high-throughput streams,
                 modern stream processing systems rely on parallel
                 execution. However, skewed data yield imbalanced load
                 assignments and create stragglers that hinder
                 scalability Deciding on a static partitioning for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Behrouz:2022:FCS,
  author =       "Ali Behrouz and Farnoosh Hashemi and Laks V. S.
                 Lakshmanan",
  title =        "{FirmTruss} Community Search in Multilayer Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "505--518",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570700",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570700",
  abstract =     "In applications such as biological, social, and
                 transportation networks, interactions between objects
                 span multiple aspects. For accurately modeling such
                 applications, multilayer networks have been proposed.
                 Community search allows for personalized \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xu:2022:ETC,
  author =       "Tianyang Xu and Zhao Lu and Yuanyuan Zhu",
  title =        "Efficient Triangle-Connected Truss Community Search in
                 Dynamic Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "519--531",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570701",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570701",
  abstract =     "Community search studies the retrieval of certain
                 community structures containing query vertices, which
                 has received lots of attention recently. k -truss is a
                 fundamental community structure where each edge is
                 contained in at least k --- 2 triangles. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sabek:2022:CLM,
  author =       "Ibrahim Sabek and Kapil Vaidya and Dominik Horn and
                 Andreas Kipf and Michael Mitzenmacher and Tim Kraska",
  title =        "Can Learned Models Replace Hash Functions?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "532--545",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570702",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570702",
  abstract =     "Hashing is a fundamental operation in database
                 management, playing a key role in the implementation of
                 numerous core database data structures and algorithms.
                 Traditional hash functions aim to mimic a function that
                 maps a key to a random value, which can \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2022:TGA,
  author =       "Yue Zhao and George H. Chen and Zhihao Jia",
  title =        "{TOD}: {GPU}-Accelerated Outlier Detection via Tensor
                 Operations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "546--560",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570703",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570703",
  abstract =     "Outlier detection (OD) is a key machine learning task
                 for finding rare and deviant data samples, with many
                 time-critical applications such as fraud detection and
                 intrusion detection. In this work, we propose TOD, the
                 first tensor-based system for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ma:2022:FFL,
  author =       "Chaohong Ma and Xiaohui Yu and Yifan Li and Xiaofeng
                 Meng and Aishan Maoliniyazi",
  title =        "{FILM}: a Fully Learned Index for Larger-Than-Memory
                 Databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "3",
  pages =        "561--573",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3570690.3570704",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:37 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3570690.3570704",
  abstract =     "As modern applications generate data at an
                 unprecedented speed and often require the
                 querying/analysis of data spanning a large duration, it
                 is crucial to develop indexing techniques that cater to
                 larger-than-memory databases, where data reside on
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mazmudar:2022:CMI,
  author =       "Miti Mazmudar and Thomas Humphries and Jiaxiang Liu
                 and Matthew Rafuse and Xi He",
  title =        "Cache Me If You Can: Accuracy-Aware Inference Engine
                 for Differentially Private Data Exploration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "574--586",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574246",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574246",
  abstract =     "Differential privacy (DP) allows data analysts to
                 query databases that contain users' sensitive
                 information while providing a quantifiable privacy
                 guarantee to users. Recent interactive DP systems such
                 as APEx provide accuracy guarantees over the query
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Falzon:2022:RSE,
  author =       "Francesca Falzon and Evangelia Anna Markatou and
                 Zachary Espiritu and Roberto Tamassia",
  title =        "Range Search over Encrypted Multi-Attribute Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "587--600",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574247",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574247",
  abstract =     "This work addresses expressive queries over encrypted
                 data by presenting the first systematic study of
                 multi-attribute range search on a symmetrically
                 encrypted database outsourced to an honest-but-curious
                 server. Prior work includes a thorough analysis
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ren:2022:HMA,
  author =       "Xuanle Ren and Le Su and Zhen Gu and Sheng Wang and
                 Feifei Li and Yuan Xie and Song Bian and Chao Li and
                 Fan Zhang",
  title =        "{HEDA}: Multi-Attribute Unbounded Aggregation over
                 Homomorphically Encrypted Database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "601--614",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574248",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574248",
  abstract =     "Recent years have witnessed the rapid development of
                 the encrypted database, due to the increasing number of
                 data privacy breaches and the corresponding laws and
                 regulations that caused millions of dollars in loss.
                 These encrypted databases may rely on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shen:2022:DPG,
  author =       "Chih-Ya Shen and Shao-Heng Ko and Guang-Siang Lee and
                 Wang-Chien Lee and De-Nian Yang",
  title =        "Density Personalized Group Query",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "615--628",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574249",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574249",
  abstract =     "Research on new queries for finding dense subgraphs
                 and groups has been actively pursued due to their many
                 applications, especially in social network analysis and
                 graph mining. However, existing work faces two major
                 weaknesses: (i) incapability of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Geng:2022:NDH,
  author =       "Jinkun Geng and Anirudh Sivaraman and Balaji Prabhakar
                 and Mendel Rosenblum",
  title =        "{Nezha}: Deployable and High-Performance Consensus
                 Using Synchronized Clocks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "629--642",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574250",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574250",
  abstract =     "This paper presents a high-performance consensus
                 protocol, Nezha, which can be deployed by cloud tenants
                 without support from cloud providers. Nezha bridges the
                 gap between protocols such as Multi-Paxos and Raft,
                 which can be readily deployed, and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ahmad:2022:PPR,
  author =       "Ishtiyaque Ahmad and Divyakant Agrawal and Amr {El
                 Abbadi} and Trinabh Gupta",
  title =        "{Pantheon}: Private Retrieval from Public Key--Value
                 Store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "643--656",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574251",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574251",
  abstract =     "Consider a cloud server that owns a key-value store
                 and provides a private query service to its clients.
                 Preserving client privacy in this setting is difficult
                 because the key-value store is public, and a client
                 cannot encrypt or modify it. Therefore, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{DaDalt:2022:BSV,
  author =       "Francesco {Da Dalt} and Simon Scherrer and Adrian
                 Perrig",
  title =        "{Bayesian} Sketches for Volume Estimation in Data
                 Streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "657--669",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574252",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574252",
  abstract =     "Given large data streams of items, each attributable
                 to a certain key and possessing a certain volume, the
                 aggregate volume associated with a key is difficult to
                 estimate in a way that is both efficient and accurate.
                 On the one hand, exact counting with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Moti:2022:WWA,
  author =       "Moin Hussain Moti and Panagiotis Simatis and Dimitris
                 Papadias",
  title =        "{Waffle}: a Workload-Aware and Query-Sensitive
                 Framework for Disk-Based Spatial Indexing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "670--683",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574253",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574253",
  abstract =     "Although several spatial indexes achieve fast query
                 processing, they are ineffective for highly dynamic
                 data sets because of costly updates. On the other hand,
                 simple structures that enable efficient updates are
                 slow for spatial queries. In this paper, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pena:2022:FAD,
  author =       "Eduardo H. M. Pena and Fabio Porto and Felix Naumann",
  title =        "Fast Algorithms for Denial Constraint Discovery",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "684--696",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574254",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574254",
  abstract =     "Denial constraints (DCs) are an integrity constraint
                 formalism widely used to detect inconsistencies in
                 data. Several algorithms have been devised to discover
                 DCs from data, as manually specifying them is
                 burdensome and, worse yet, error-prone. The \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jiao:2022:TQI,
  author =       "Pu Jiao and Sheng Di and Hanqi Guo and Kai Zhao and
                 Jiannan Tian and Dingwen Tao and Xin Liang and Franck
                 Cappello",
  title =        "Toward Quantity-of-Interest Preserving Lossy
                 Compression for Scientific Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "697--710",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574255",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574255",
  abstract =     "Today's scientific simulations and instruments are
                 producing a large amount of data, leading to
                 difficulties in storing, transmitting, and analyzing
                 these data. While error-controlled lossy compressors
                 are effective in significantly reducing data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Demirci:2022:SGC,
  author =       "Gunduz Vehbi Demirci and Aparajita Haldar and Hakan
                 Ferhatosmanoglu",
  title =        "Scalable Graph Convolutional Network Training on
                 Distributed-Memory Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "711--724",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574256",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574256",
  abstract =     "Graph Convolutional Networks (GCNs) are extensively
                 utilized for deep learning on graphs. The large data
                 sizes of graphs and their vertex features make scalable
                 training algorithms and distributed memory systems
                 necessary. Since the convolution \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schafer:2022:MSA,
  author =       "Patrick Sch{\"a}fer and Ulf Leser",
  title =        "{Motiflets}: Simple and Accurate Detection of Motifs
                 in Time Series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "725--737",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574257",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574257",
  abstract =     "A time series motif intuitively is a short time series
                 that repeats itself approximately the same within a
                 larger time series. Such motifs often represent
                 concealed structures, such as heart beats in an ECG
                 recording, the riff in a pop song, or sleep \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Narayan:2022:CFM,
  author =       "Avanika Narayan and Ines Chami and Laurel Orr and
                 Christopher R{\'e}",
  title =        "Can Foundation Models Wrangle Your Data?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "738--746",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574258",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574258",
  abstract =     "Foundation Models (FMs) are models trained on large
                 corpora of data that, at very large scale, can
                 generalize to new tasks without any task-specific
                 finetuning. As these models continue to grow in size,
                 innovations continue to push the boundaries of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kim:2022:MDB,
  author =       "Bogyeong Kim and Kyoseung Koo and Undraa Enkhbat and
                 Sohyun Kim and Juhun Kim and Bongki Moon",
  title =        "{M2Bench}: a Database Benchmark for Multi-Model
                 Analytic Workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "747--759",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574259",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574259",
  abstract =     "As the world becomes increasingly data-centric, the
                 tasks dealt with by a database management system (DBMS)
                 become more complex and diverse. Compared with
                 traditional workloads that typically require only a
                 single data model, modern-day computational \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Baruah:2022:POD,
  author =       "Nirvik Baruah and Peter Kraft and Fiodar Kazhamiaka
                 and Peter Bailis and Matei Zaharia",
  title =        "Parallelism-Optimizing Data Placement for Faster
                 Data-Parallel Computations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "760--771",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574260",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574260",
  abstract =     "Systems performing large data-parallel computations,
                 including online analytical processing (OLAP) systems
                 like Druid and search engines like Elasticsearch, are
                 increasingly being used for business-critical real-time
                 applications where providing low \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lazebnik:2022:SSB,
  author =       "Teddy Lazebnik and Amit Somech and Abraham Itzhak
                 Weinberg",
  title =        "{SubStrat}: a Subset-Based Optimization Strategy for
                 Faster {AutoML}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "772--780",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574261",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574261",
  abstract =     "Automated machine learning (AutoML) frameworks have
                 become important tools in the data scientist's arsenal,
                 as they dramatically reduce the manual work devoted to
                 the construction of ML pipelines. Such frameworks
                 intelligently search among millions of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gurukar:2022:MWS,
  author =       "Saket Gurukar and Nikil Pancha and Andrew Zhai and
                 Eric Kim and Samson Hu and Srinivasan Parthasarathy and
                 Charles Rosenberg and Jure Leskovec",
  title =        "{MultiBiSage}: a {Web}-Scale Recommendation System
                 Using Multiple Bipartite Graphs at {Pinterest}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "781--789",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574262",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574262",
  abstract =     "Graph Convolutional Networks (GCN) can efficiently
                 integrate graph structure and node features to learn
                 high-quality node embeddings. At Pinterest, we have
                 developed and deployed PinSage, a data-efficient GCN
                 that learns pin embeddings from the Pin-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zeakis:2022:TEF,
  author =       "Alexandros Zeakis and Dimitrios Skoutas and Dimitris
                 Sacharidis and Odysseas Papapetrou and Manolis
                 Koubarakis",
  title =        "{TokenJoin}: Efficient Filtering for Set Similarity
                 Join with Maximum Weighted Bipartite Matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "790--802",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574263",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574263",
  abstract =     "Set similarity join is an important problem with many
                 applications in data discovery, cleaning and
                 integration. To increase robustness, fuzzy set
                 similarity join calculates the similarity of two sets
                 based on maximum weighted bipartite matching instead
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kayali:2022:QSC,
  author =       "Moe Kayali and Dan Suciu",
  title =        "Quasi-Stable Coloring for Graph Compression:
                 Approximating Max-Flow, Linear Programs, and
                 Centrality",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "803--815",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574264",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574264",
  abstract =     "We propose quasi-stable coloring, an approximate
                 version of stable coloring. Stable coloring, also
                 called color refinement, is a well-studied technique in
                 graph theory for classifying vertices, which can be
                 used to build compact, lossless \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pujol:2022:MAD,
  author =       "David Pujol and Albert Sun and Brandon Fain and Ashwin
                 Machanavajjhala",
  title =        "Multi-Analyst Differential Privacy for Online Query
                 Answering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "816--828",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574265",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574265",
  abstract =     "Most differentially private mechanisms are designed
                 for the use of a single analyst. In reality, however,
                 there are often multiple stakeholders with different
                 and possibly conflicting priorities that must share the
                 same privacy loss budget. This \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gubner:2022:EVM,
  author =       "Tim Gubner and Peter Boncz",
  title =        "{Excalibur}: a Virtual Machine for Adaptive
                 Fine-grained {JIT}-Compiled Query Execution based on
                 {VOILA}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "829--841",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574266",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574266",
  abstract =     "In recent years, hardware has become increasingly
                 diverse, in terms of features as well as performance.
                 This poses a problem for complex software in general
                 and database systems in particular. To achieve
                 top-notch performance, we need to exploit \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Qin:2022:ADO,
  author =       "Lianke Qin and Rajesh Jayaram and Elaine Shi and Zhao
                 Song and Danyang Zhuo and Shumo Chu",
  title =        "{Adore}: Differentially Oblivious Relational Database
                 Operators",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "842--855",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574267",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574267",
  abstract =     "There has been a recent effort in applying
                 differential privacy on memory access patterns to
                 enhance data privacy. This is called differential
                 obliviousness. Differential obliviousness is a
                 promising direction because it provides a principled
                 trade-off \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Laddad:2022:KCC,
  author =       "Shadaj Laddad and Conor Power and Mae Milano and Alvin
                 Cheung and Natacha Crooks and Joseph M. Hellerstein",
  title =        "Keep {CALM} and {CRDT} On",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "856--863",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574268",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574268",
  abstract =     "Despite decades of research and practical experience,
                 developers have few tools for programming reliable
                 distributed applications without resorting to expensive
                 coordination techniques. Conflict-free replicated
                 datatypes (CRDTs) are a promising line of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lu:2022:MLS,
  author =       "Kejing Lu and Yoshiharu Ishikawa and Chuan Xiao",
  title =        "{MQH}: Locality Sensitive Hashing on Multi-level
                 Quantization Errors for Point-to-Hyperplane Distances",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "864--876",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574269",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574269",
  abstract =     "Point-to-hyperplane nearest neighbor search (P2HNNS)
                 is a fundamental problem which has many applications in
                 data mining and machine learning. In this paper, we
                 propose a provable Locality-Sensitive-Hashing (LSH)
                 scheme based on multi-level quantization \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Szarnyas:2022:LSN,
  author =       "G{\'a}bor Sz{\'a}rnyas and Jack Waudby and Benjamin A.
                 Steer and D{\'a}vid Szak{\'a}llas and Altan Birler and
                 Mingxi Wu and Yuchen Zhang and Peter Boncz",
  title =        "The {LDBC} Social Network Benchmark: Business
                 Intelligence Workload",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "877--890",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574270",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574270",
  abstract =     "The Social Network Benchmark's Business Intelligence
                 workload (SNB BI) is a comprehensive graph OLAP
                 benchmark targeting analytical data systems capable of
                 supporting graph workloads. This paper marks the
                 finalization of almost a decade of research in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{An:2022:MCM,
  author =       "Shuai An and Yang Cao",
  title =        "Making Cache Monotonic and Consistent",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "891--904",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574271",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574271",
  abstract =     "We propose monotonic consistent caching (MCC), a cache
                 scheme for applications that demand consistency and
                 monotonicity. MCC warrants that a transaction-like
                 request always sees a consistent view of the backend
                 database and observed writes over the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wei:2022:SPE,
  author =       "Ziyun Wei and Immanuel Trummer",
  title =        "{SkinnerMT}: Parallelizing for Efficiency and
                 Robustness in Adaptive Query Processing on Multicore
                 Platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "905--917",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574272",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574272",
  abstract =     "SkinnerMT is an adaptive query processing engine,
                 specialized for multi-core platforms. SkinnerMT
                 features different strategies for parallel processing
                 that allow users to trade between average run time and
                 performance robustness. First, SkinnerMT \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ding:2022:EAQ,
  author =       "Dujian Ding and Sihem Amer-Yahia and Laks Lakshmanan",
  title =        "On Efficient Approximate Queries over Machine Learning
                 Models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "918--931",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574273",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574273",
  abstract =     "The question of answering queries over ML predictions
                 has been gaining attention in the database community.
                 This question is challenging because finding high
                 quality answers by invoking an oracle such as a human
                 expert or an expensive deep neural \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Khatiwada:2022:IDL,
  author =       "Aamod Khatiwada and Roee Shraga and Wolfgang
                 Gatterbauer and Ren{\'e}e J. Miller",
  title =        "Integrating Data Lake Tables",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "932--945",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574274",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574274",
  abstract =     "We have made tremendous strides in providing tools for
                 data scientists to discover new tables useful for their
                 analyses. But despite these advances, the proper
                 integration of discovered tables has been
                 under-explored. An interesting semantics for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kang:2022:PTS,
  author =       "Hongbo Kang and Yiwei Zhao and Guy E. Blelloch and
                 Laxman Dhulipala and Yan Gu and Charles McGuffey and
                 Phillip B. Gibbons",
  title =        "{PIM-Tree}: a Skew-Resistant Index for
                 Processing-in-Memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "4",
  pages =        "946--958",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3574245.3574275",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:39 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3574245.3574275",
  abstract =     "The performance of today's in-memory indexes is
                 bottlenecked by the memory latency/bandwidth wall.
                 Processing-in-memory (PIM) is an emerging approach that
                 potentially mitigates this bottleneck, by enabling
                 low-latency memory access whose aggregate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:ATR,
  author =       "Mingxuan Li and Yazhe Wang and Shuai Ma and Chao Liu
                 and Dongdong Huo and Yu Wang and Zhen Xu",
  title =        "Auto-Tuning with Reinforcement Learning for
                 Permissioned Blockchain Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1000--1012",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579076",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579076",
  abstract =     "In a permissioned blockchain, performance dictates its
                 development, which is substantially influenced by its
                 parameters. However, research on auto-tuning for better
                 performance has somewhat stagnated because of the
                 difficulty posed by distributed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xie:2023:PSH,
  author =       "Minhui Xie and Youyou Lu and Qing Wang and Yangyang
                 Feng and Jiaqiang Liu and Kai Ren and Jiwu Shu",
  title =        "{PetPS}: Supporting Huge Embedding Models with
                 Persistent Memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1013--1022",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579077",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579077",
  abstract =     "Embedding models are effective for learning
                 high-dimensional sparse data. Traditionally, they are
                 deployed in DRAM parameter servers (PS) for online
                 inference access. However, the ever-increasing model
                 capacity makes this practice suffer from both high
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rabbani:2023:EVS,
  author =       "Kashif Rabbani and Matteo Lissandrini and Katja Hose",
  title =        "Extraction of Validating Shapes from Very Large
                 Knowledge Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1023--1032",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579078",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579078",
  abstract =     "Knowledge Graphs (KGs) represent heterogeneous domain
                 knowledge on the Web and within organizations. There
                 exist shapes constraint languages to define validating
                 shapes to ensure the quality of the data in KGs.
                 Existing techniques to extract validating \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pang:2023:AFM,
  author =       "Pu Pang and Gang Deng and Kaihao Bai and Quan Chen and
                 Shixuan Sun and Bo Liu and Yu Xu and Hongbo Yao and
                 Zhengheng Wang and Xiyu Wang and Zheng Liu and Zhuo
                 Song and Yong Yang and Tao Ma and Minyi Guo",
  title =        "{Async-Fork}: Mitigating Query Latency Spikes Incurred
                 by the Fork-based Snapshot Mechanism from the {OS}
                 Level",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1033--1045",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579079",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579079",
  abstract =     "In-memory key-value stores (IMKVSes) serve many online
                 applications. They generally adopt the fork-based
                 snapshot mechanism to support data backup. However,
                 this method can result in query latency spikes because
                 the engine is out-of-service for queries \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2023:CPJ,
  author =       "Qichen Wang and Xiao Hu and Binyang Dai and Ke Yi",
  title =        "Change Propagation Without Joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1046--1058",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579080",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579080",
  abstract =     "We revisit the classical change propagation framework
                 for query evaluation under updates. The standard
                 framework takes a query plan and materializes the
                 intermediate views, which incurs high polynomial costs
                 in both space and time, with the join \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xie:2023:FFF,
  author =       "Yuexiang Xie and Zhen Wang and Dawei Gao and Daoyuan
                 Chen and Liuyi Yao and Weirui Kuang and Yaliang Li and
                 Bolin Ding and Jingren Zhou",
  title =        "{FederatedScope}: a Flexible Federated Learning
                 Platform for Heterogeneity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1059--1072",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579081",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579081",
  abstract =     "Although remarkable progress has been made by existing
                 federated learning (FL) platforms to provide
                 infrastructures for development, these platforms may
                 not well tackle the challenges brought by various types
                 of heterogeneity. To fill this gap, in this \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:AAC,
  author =       "Boyang Li and Yurong Cheng and Ye Yuan and Yi Yang and
                 QianQian Jin and Guoren Wang",
  title =        "{ACTA}: Autonomy and Coordination Task Assignment in
                 Spatial Crowdsourcing Platforms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1073--1085",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579082",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579082",
  abstract =     "Spatial platforms have become increasingly important
                 in people's daily lives. Task assignment is a critical
                 problem in these platforms that matches real-time
                 orders to suitable workers. Most studies only focus on
                 independent platforms that are in a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Um:2023:FAD,
  author =       "Taegeon Um and Byungsoo Oh and Byeongchan Seo and
                 Minhyeok Kweun and Goeun Kim and Woo-Yeon Lee",
  title =        "{FastFlow}: Accelerating Deep Learning Model Training
                 with Smart Offloading of Input Data Pipeline",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1086--1099",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579083",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579083",
  abstract =     "When training a deep learning (DL) model, input data
                 are pre-processed on CPUs and transformed into tensors,
                 which are then fed into GPUs for gradient computations
                 of model training. Expensive GPUs must be fully
                 utilized during training to accelerate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2023:FFM,
  author =       "Xi Zhao and Bolong Zheng and Xiaomeng Yi and Xiaofan
                 Luan and Charles Xie and Xiaofang Zhou and Christian S.
                 Jensen",
  title =        "{FARGO}: Fast Maximum Inner Product Search via Global
                 Multi-Probing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1100--1112",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579084",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579084",
  abstract =     "Maximum inner product search (MIPS) in
                 high-dimensional spaces has wide applications but is
                 computationally expensive due to the curse of
                 dimensionality. Existing studies employ asymmetric
                 transformations that reduce the MIPS problem to a
                 nearest \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kiefer:2023:ODP,
  author =       "Martin Kiefer and Ilias Poulakis and Eleni Tzirita
                 Zacharatou and Volker Markl",
  title =        "Optimistic Data Parallelism for {FPGA}-Accelerated
                 Sketching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1113--1125",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579085",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579085",
  abstract =     "Sketches are a popular approximation technique for
                 large datasets and high-velocity data streams. While
                 custom FPGA-based hardware has shown admirable
                 throughput at sketching, the state-of-the-art exploits
                 data parallelism by fully replicating resources
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Arcolezi:2023:RCM,
  author =       "H{\'e}ber H. Arcolezi and S{\'e}bastien Gambs and
                 Jean-Fran{\c{c}}ois Couchot and Catuscia Palamidessi",
  title =        "On the Risks of Collecting Multidimensional Data Under
                 Local Differential Privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1126--1139",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579086",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579086",
  abstract =     "The private collection of multiple statistics from a
                 population is a fundamental statistical problem. One
                 possible approach to realize this is to rely on the
                 local model of differential privacy (LDP). Numerous LDP
                 protocols have been developed for the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chatzakis:2023:OJL,
  author =       "Manos Chatzakis and Panagiota Fatourou and Eleftherios
                 Kosmas and Themis Palpanas and Botao Peng",
  title =        "{Odyssey}: a Journey in the Land of Distributed Data
                 Series Similarity Search",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1140--1153",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579087",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579087",
  abstract =     "This paper presents Odyssey, a novel distributed
                 data-series processing framework that efficiently
                 addresses the critical challenges of exhibiting good
                 speedup and ensuring high scalability in data series
                 processing by taking advantage of the full \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fang:2023:AER,
  author =       "Lanting Fang and Kaiyu Feng and Jie Gui and Shanshan
                 Feng and Aiqun Hu",
  title =        "Anonymous Edge Representation for Inductive Anomaly
                 Detection in Dynamic Bipartite Graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1154--1167",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579088",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579088",
  abstract =     "The activities in many real-world applications, such
                 as e-commerce and online education, are usually modeled
                 as a dynamic bipartite graph that evolves over time. It
                 is a critical task to detect anomalies inductively in a
                 dynamic bipartite graph. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2023:STR,
  author =       "Junyong Yang and Ming Zhong and Yuanyuan Zhu and
                 Tieyun Qian and Mengchi Liu and Jeffrey Xu Yu",
  title =        "Scalable Time-Range $k$-Core Query on Temporal
                 Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1168--1180",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579089",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579089",
  abstract =     "Querying cohesive subgraphs on temporal graphs with
                 various time constraints has attracted intensive
                 research interests recently. In this paper, we study a
                 novel Temporal k -Core Query (TCQ) problem: given a
                 time interval, find all distinct k -cores that
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2023:HPR,
  author =       "Erkang Zhu and Silu Huang and Surajit Chaudhuri",
  title =        "High-Performance Row Pattern Recognition Using Joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1181--1195",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579090",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579090",
  abstract =     "The SQL standard introduced MATCH_RECOGNIZE in 2016
                 for row pattern recognition. Since then,
                 MATCH_RECOGNIZE has been supported by several leading
                 relation systems, they implemented this function using
                 Non-Deterministic Finite Automaton (NFA). While NFA
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Luo:2023:HGA,
  author =       "Kelin Luo and Alexandre M. Florio and Syamantak Das
                 and Xiangyu Guo",
  title =        "A Hierarchical Grouping Algorithm for the
                 Multi-Vehicle Dial-a-Ride Problem",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "5",
  pages =        "1195--1207",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3579075.3579091",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Sat Mar 11 08:12:40 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3579075.3579091",
  abstract =     "Ride-sharing is an essential aspect of modern urban
                 mobility. In this paper, we consider a classical
                 problem in ride-sharing --- the Multi-Vehicle
                 Dial-a-Ride Problem (Multi-Vehicle DaRP). Given a fleet
                 of vehicles with a fixed capacity stationed at
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2023:LAD,
  author =       "Xiaoxuan Liu and Shuxian Wang and Mengzhu Sun and
                 Sicheng Pan and Ge Li and Siddharth Jha and Cong Yan
                 and Junwen Yang and Shan Lu and Alvin Cheung",
  title =        "Leveraging Application Data Constraints to Optimize
                 Database-Backed {Web} Applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1208--1221",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583141",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583141",
  abstract =     "Exploiting the relationships among data is a classical
                 query optimization technique. As persistent data is
                 increasingly being created and maintained
                 programmatically, prior work that infers data
                 relationships from data statistics misses an important
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gruber:2023:BCD,
  author =       "Ferdinand Gruber and Maximilian Bandle and Alexis
                 Engelke and Thomas Neumann and Jana Giceva",
  title =        "Bringing Compiling Databases to {RISC} Architectures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1222--1234",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583142",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583142",
  abstract =     "Current hardware development greatly influences the
                 design decisions of modern database systems. For many
                 modern performance-focused database systems, query
                 compilation emerged as an integral part and different
                 approaches for code generation evolved, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cha:2023:BLH,
  author =       "Hokeun Cha and Xiangpeng Hao and Tianzheng Wang and
                 Huanchen Zhang and Aditya Akella and Xiangyao Yu",
  title =        "{B$^{\rm link}$-hash}: an Adaptive Hybrid Index for
                 In-Memory Time-Series Databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1235--1248",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583143",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583143",
  abstract =     "High-speed data ingestion is critical in time-series
                 workloads that are driven by the growth of Internet of
                 Things (IoT) applications. We observe that traditional
                 tree-based indexes encounter severe scalability
                 bottlenecks for time-series workloads that insert
                 monotonically increasing timestamp keys into an index;
                 all insertions go to a small memory region that sees
                 extremely high contention.\par

                 In this work, we present a new index design,
                 Blink-hash, that enhances a tree-based index with hash
                 leaf nodes to mitigate the contention of monotonic
                 insertions --- insertions go to random locations within
                 a hash node (which is much larger than a B+-tree node)
                 to reduce conflicts. We develop further optimizations
                 (median approximation and lazy split) to accelerate
                 hash node splits. We also develop structure adaptation
                 optimizations to dynamically convert a hash node to
                 B+-tree nodes for good scan performance. Our evaluation
                 shows that Blink-hash achieves up to 91.3$ \times $
                 higher throughput than conventional indexes in a
                 time-series workload that monotonically inserts
                 timestamps into an index, while showing comparable scan
                 performance to a well-optimized B+-tree.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2023:DSE,
  author =       "Wentao Huang and Yunhong Ji and Xuan Zhou and
                 Bingsheng He and Kian-Lee Tan",
  title =        "A Design Space Exploration and Evaluation for
                 Main-Memory Hash Joins in Storage Class Memory",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1249--1263",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583144",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583144",
  abstract =     "In this paper, we seek to perform a rigorous
                 experimental study of main-memory hash joins in storage
                 class memory (SCM). In particular, we perform a design
                 space exploration in real SCM for two state-of-the-art
                 join algorithms: partitioned hash join \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2023:EBB,
  author =       "Kaile Huang and Si Liu and Zhenge Chen and Hengfeng
                 Wei and David Basin and Haixiang Li and Anqun Pan",
  title =        "Efficient Black-Box Checking of Snapshot Isolation in
                 Databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1264--1276",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583145",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583145",
  abstract =     "Snapshot isolation (SI) is a prevalent weak isolation
                 level that avoids the performance penalty imposed by
                 serializability and simultaneously prevents various
                 undesired data anomalies. Nevertheless, SI anomalies
                 have recently been found in production \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:DPV,
  author =       "Zitao Li and Tianhao Wang and Ninghui Li",
  title =        "Differentially Private Vertical Federated Clustering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1277--1290",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583146",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583146",
  abstract =     "In many applications, multiple parties have private
                 data regarding the same set of users but on disjoint
                 sets of attributes, and a server wants to leverage the
                 data to train a model. To enable model learning while
                 protecting the privacy of the data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2023:PCT,
  author =       "Fuheng Zhao and Punnal Ismail Khan and Divyakant
                 Agrawal and Amr {El Abbadi} and Arpit Gupta and Zaoxing
                 Liu",
  title =        "{Panakos}: Chasing the Tails for Multidimensional Data
                 Streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1291--1304",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583147",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583147",
  abstract =     "System operators are often interested in extracting
                 different feature streams from multi-dimensional data
                 streams; and reporting their distributions at regular
                 intervals, including the heavy hitters that contribute
                 to the tail portion of the feature \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Furst:2023:VOM,
  author =       "Jonathan F{\"u}rst and Mauricio Fadel Argerich and Bin
                 Cheng",
  title =        "{VersaMatch}: Ontology Matching with Weak
                 Supervision",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1305--1318",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583148",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583148",
  abstract =     "Ontology matching is crucial to data integration for
                 across-silo data sharing and has been mainly addressed
                 with heuristic and machine learning (ML) methods. While
                 heuristic methods are often inflexible and hard to
                 extend to new domains, ML methods rely \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2023:RRT,
  author =       "Yushi Sun and Hao Xin and Lei Chen",
  title =        "{RECA}: Related Tables Enhanced Column Semantic Type
                 Annotation Framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1319--1331",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583149",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583149",
  abstract =     "Understanding the semantics of tabular data is of
                 great importance in various downstream applications,
                 such as schema matching, data cleaning, and data
                 integration. Column semantic type annotation is a
                 critical task in the semantic understanding of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:ZWT,
  author =       "Yiming Li and Yanyan Shen and Lei Chen and Mingxuan
                 Yuan",
  title =        "{Zebra}: When Temporal Graph Neural Networks Meet
                 Temporal Personalized {PageRank}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1332--1345",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583150",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583150",
  abstract =     "Temporal graph neural networks (T-GNNs) are
                 state-of-the-art methods for learning representations
                 over dynamic graphs. Despite the superior performance,
                 T-GNNs still suffer from high computational complexity
                 caused by the tedious recursive temporal \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Feng:2023:EAC,
  author =       "Su Feng and Boris Glavic and Oliver Kennedy",
  title =        "Efficient Approximation of Certain and Possible
                 Answers for Ranking and Window Queries over Uncertain
                 Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1346--1358",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583151",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583151",
  abstract =     "Uncertainty arises naturally in many application
                 domains due to, e.g., data entry errors and ambiguity
                 in data cleaning. Prior work in incomplete and
                 probabilistic databases has investigated the semantics
                 and efficient evaluation of ranking and top-k
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yue:2023:GEV,
  author =       "Cong Yue and Tien Tuan Anh Dinh and Zhongle Xie and
                 Meihui Zhang and Gang Chen and Beng Chin Ooi and
                 Xiaokui Xiao",
  title =        "{GlassDB}: an Efficient Verifiable Ledger Database
                 System Through Transparency",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1359--1371",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583152",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583152",
  abstract =     "Verifiable ledger databases protect data history
                 against malicious tampering. Existing systems, such as
                 blockchains and certificate transparency, are based on
                 transparency logs --- a simple abstraction allowing
                 users to verify that a log maintained by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:EDT,
  author =       "Qian Zhang and Jingyao Li and Hongyao Zhao and
                 Quanqing Xu and Wei Lu and Jinliang Xiao and Fusheng
                 Han and Chuanhui Yang and Xiaoyong Du",
  title =        "Efficient Distributed Transaction Processing in
                 Heterogeneous Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1372--1385",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583153",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583153",
  abstract =     "Countrywide and worldwide business, like gaming and
                 social networks, drives the popularity of
                 inter-data-center transactions. To support
                 inter-data-center transaction processing and data
                 center fault tolerance simultaneously, existing
                 protocols suffer \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jiang:2023:ASE,
  author =       "Zhiguo Jiang and Hanhua Chen and Hai Jin",
  title =        "{Auxo}: a Scalable and Efficient Graph Stream
                 Summarization Structure",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1386--1398",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583154",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583154",
  abstract =     "A graph stream refers to a continuous stream of edges,
                 forming a huge and fast-evolving graph. The vast volume
                 and high update speed of a graph stream bring stringent
                 requirements for the data management structure,
                 including sublinear space cost, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{He:2023:OOS,
  author =       "Xiao He and Ye Li and Jian Tan and Bin Wu and Feifei
                 Li",
  title =        "{OneShotSTL}: One-Shot Seasonal-Trend Decomposition
                 For Online Time Series Anomaly Detection And
                 Forecasting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1399--1412",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583155",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583155",
  abstract =     "Seasonal-trend decomposition is one of the most
                 fundamental concepts in time series analysis that
                 supports various downstream tasks, including time
                 series anomaly detection and forecasting. However,
                 existing decomposition methods rely on batch \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{vanRenen:2023:CAB,
  author =       "Alexander van Renen and Viktor Leis",
  title =        "Cloud Analytics Benchmark",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1413--1425",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583156",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583156",
  abstract =     "The cloud facilitates the transition to a
                 service-oriented perspective. This affects cloud-native
                 data management in general, and data analytics in
                 particular. Instead of managing a multi-node database
                 cluster on-premise, end users simply send queries
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Alhomssi:2023:SRS,
  author =       "Adnan Alhomssi and Viktor Leis",
  title =        "Scalable and Robust Snapshot Isolation for
                 High-Performance Storage Engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1426--1438",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583157",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583157",
  abstract =     "MVCC-based snapshot isolation promises that read
                 queries can proceed without interfering with concurrent
                 writes. However, as we show experimentally, in existing
                 implementations a single long-running query can easily
                 cause transactional throughput to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:FFS,
  author =       "Xiang Li and Fabing Li and Mingyu Gao",
  title =        "{Flare}: a Fast, Secure, and Memory-Efficient
                 Distributed Analytics Framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1439--1452",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583158",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583158",
  abstract =     "As big data processing in the cloud becomes prevalent
                 today, data privacy on such public platforms raises
                 critical concerns. Hardware-based trusted execution
                 environments (TEEs) provide promising and practical
                 platforms for low-cost privacy-preserving \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{An:2023:NSB,
  author =       "Mijin An and Jonghyeok Park and Tianzheng Wang and
                 Beomseok Nam and Sang-Won Lee",
  title =        "{NV-SQL}: Boosting {OLTP} Performance with
                 Non-Volatile {DIMMs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1453--1465",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583159",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583159",
  abstract =     "When running OLTP workloads, relational DBMSs with
                 flash SSDs still suffer from the durability overhead.
                 Heavy writes to SSD not only limit the performance but
                 also shorten the storage lifespan. To mitigate the
                 durability overhead, this paper proposes a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2023:LLR,
  author =       "Rong Zhu and Wei Chen and Bolin Ding and Xingguang
                 Chen and Andreas Pfadler and Ziniu Wu and Jingren
                 Zhou",
  title =        "{Lero}: a Learning-to-Rank Query Optimizer",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1466--1479",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583160",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583160",
  abstract =     "A recent line of works apply machine learning
                 techniques to assist or rebuild cost-based query
                 optimizers in DBMS. While exhibiting superiority in
                 some benchmarks, their deficiencies, e.g., unstable
                 performance, high training cost, and slow model
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2023:DCS,
  author =       "Kitaek Lee and Insoon Jo and Jaechan Ahn and Hyuk Lee
                 and Hwang Lee and Woong Sul and Hyungsoo Jung",
  title =        "Deploying Computational Storage for {HTAP DBMSs} Takes
                 More Than Just Computation Offloading",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1480--1493",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583161",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583161",
  abstract =     "Hybrid transactional/analytical processing (HTAP)
                 would overload database systems. To alleviate
                 performance interference between transactions and
                 analytics, recent research pursues the potential of
                 in-storage processing (ISP) using commodity \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tang:2023:TPC,
  author =       "Dixin Tang and Alan Fekete and Indranil Gupta and
                 Aditya G. Parameswaran",
  title =        "Transactional Panorama: a Conceptual Framework for
                 User Perception in Analytical Visual Interfaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1494--1506",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583162",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583162",
  abstract =     "Many tools empower analysts and data scientists to
                 consume analysis results in a visual interface. When
                 the underlying data changes, these results need to be
                 updated, but this update can take a long time---all
                 while the user continues to explore the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Paulsen:2023:SSY,
  author =       "Derek Paulsen and Yash Govind and AnHai Doan",
  title =        "{Sparkly}: a Simple yet Surprisingly Strong {TF\slash
                 IDF} Blocker for Entity Matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1507--1519",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583163",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583163",
  abstract =     "Blocking is a major task in entity matching. Numerous
                 blocking solutions have been developed, but as far as
                 we can tell, blocking using the well-known tf/idf
                 measure has received virtually no attention. Yet, when
                 we experimented with tf/idf blocking \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Negi:2023:RQD,
  author =       "Parimarjan Negi and Ziniu Wu and Andreas Kipf and
                 Nesime Tatbul and Ryan Marcus and Sam Madden and Tim
                 Kraska and Mohammad Alizadeh",
  title =        "Robust Query Driven Cardinality Estimation under
                 Changing Workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1520--1533",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583164",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583164",
  abstract =     "Query driven cardinality estimation models learn from
                 a historical log of queries. They are lightweight,
                 having low storage requirements, fast inference and
                 training, and are easily adaptable for any kind of
                 query. Unfortunately, such models can suffer \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fu:2023:CTR,
  author =       "Han Fu and Chang Liu and Bin Wu and Feifei Li and Jian
                 Tan and Jianling Sun",
  title =        "{CatSQL}: Towards Real World Natural Language to {SQL}
                 Applications",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1534--1547",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583165",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583165",
  abstract =     "Natural language to SQL (NL2SQL) techniques provide a
                 convenient interface to access databases, especially
                 for non-expert users, to conduct various data
                 analytics. Existing methods often employ either a
                 rule-base approach or a deep learning based \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Azizi:2023:EGB,
  author =       "Ilias Azizi and Karima Echihabi and Themis Palpanas",
  title =        "{ELPIS}: Graph-Based Similarity Search for Scalable
                 Data Science",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1548--1559",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583166",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583166",
  abstract =     "The recent popularity of learned embeddings has fueled
                 the growth of massive collections of high-dimensional
                 (high-d) vectors that model complex data. Finding
                 similar vectors in these collections is at the core of
                 many important and practical data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Duffy:2023:DKV,
  author =       "Carl Duffy and Jaehoon Shim and Sang-Hoon Kim and
                 Jin-Soo Kim",
  title =        "{Dotori}: a Key--Value {SSD} Based {KV} Store",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1560--1572",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583167",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583167",
  abstract =     "Key-value SSDs (KVSSDs) represent a major shift in the
                 storage stack design, with numerous potential benefits.
                 Despite this, their lack of native features critical to
                 operation in real world scenarios hinders their
                 adoption, and these benefits go \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pujol:2023:PPG,
  author =       "David Pujol and Amir Gilad and Ashwin
                 Machanavajjhala",
  title =        "{PreFair}: Privately Generating Justifiably Fair
                 Synthetic Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1573--1586",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583168",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583168",
  abstract =     "When a database is protected by Differential Privacy
                 (DP), its usability is limited in scope. In this
                 scenario, generating a synthetic version of the data
                 that mimics the properties of the private data allows
                 users to perform any operation on the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shraga:2023:EDC,
  author =       "Roee Shraga and Ren{\'e}e J. Miller",
  title =        "Explaining Dataset Changes for Semantic Data
                 Versioning with {Explain-Da-V}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "6",
  pages =        "1587--1600",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3583140.3583169",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon May 1 07:43:11 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3583140.3583169",
  abstract =     "In multi-user environments in which data science and
                 analysis is collaborative, multiple versions of the
                 same datasets are generated. While managing and storing
                 data versions has received some attention in the
                 research literature, the semantic nature of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Budiu:2023:DAI,
  author =       "Mihai Budiu and Tej Chajed and Frank McSherry and
                 Leonid Ryzhyk and Val Tannen",
  title =        "{DBSP}: Automatic Incremental View Maintenance for
                 Rich Query Languages",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1601--1614",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587137",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587137",
  abstract =     "Incremental view maintenance (IVM) has long been a
                 central problem in database theory. Many solutions have
                 been proposed for restricted classes of database
                 languages, such as the relational algebra, or Datalog.
                 These techniques do not naturally \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liang:2023:SSP,
  author =       "Ling Liang and Jilan Lin and Zheng Qu and Ishtiyaque
                 Ahmad and Fengbin Tu and Trinabh Gupta and Yufei Ding
                 and Yuan Xie",
  title =        "{SPG}: Structure-Private Graph Database via
                 {SqueezePIR}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1615--1628",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587138",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587138",
  abstract =     "Many relational data in our daily life are represented
                 as graphs, making graph application an important
                 workload. Because of the large scale of graph datasets,
                 moving graph data to the cloud becomes a popular
                 option. To keep the confidential and private \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:IES,
  author =       "Jingyuan Zhang and Ao Wang and Xiaolong Ma and
                 Benjamin Carver and Nicholas John Newman and Ali Anwar
                 and Lukas Rupprecht and Vasily Tarasov and Dimitrios
                 Skourtis and Feng Yan and Yue Cheng",
  title =        "{InfiniStore}: Elastic Serverless Cloud Storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1629--1642",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587139",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587139",
  abstract =     "Cloud object storage such as AWS S3 is cost-effective
                 and highly elastic but relatively slow, while
                 high-performance cloud storage such as AWS ElastiCache
                 is expensive and provides limited elasticity. We
                 present a new cloud storage service called \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fang:2023:DGE,
  author =       "Peng Fang and Arijit Khan and Siqiang Luo and Fang
                 Wang and Dan Feng and Zhenli Li and Wei Yin and Yuchao
                 Cao",
  title =        "Distributed Graph Embedding with Information-Oriented
                 Random Walks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1643--1656",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587140",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587140",
  abstract =     "Graph embedding maps graph nodes to low-dimensional
                 vectors, and is widely adopted in machine learning
                 tasks. The increasing availability of billion-edge
                 graphs underscores the importance of learning efficient
                 and effective embeddings on large graphs, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zheng:2023:SSV,
  author =       "Shuyuan Zheng and Yang Cao and Masatoshi Yoshikawa",
  title =        "Secure {Shapley} Value for Cross-Silo Federated
                 Learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1657--1670",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587141",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587141",
  abstract =     "The Shapley value (SV) is a fair and principled metric
                 for contribution evaluation in cross-silo federated
                 learning (cross-silo FL), wherein organizations, i.e.,
                 clients, collaboratively train prediction models with
                 the coordination of a parameter \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:SSF,
  author =       "Xiang Li and Nuozhou Sun and Yunqian Luo and Mingyu
                 Gao",
  title =        "{SODA}: a Set of Fast Oblivious Algorithms in
                 Distributed Secure Data Analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1671--1684",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587142",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587142",
  abstract =     "Cloud systems are now a prevalent platform to host
                 large-scale big-data analytics applications such as
                 machine learning and relational database. However, data
                 privacy remains as a critical concern for public cloud
                 systems. Existing trusted hardware \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hong:2023:GSB,
  author =       "Zicong Hong and Song Guo and Enyuan Zhou and Wuhui
                 Chen and Huawei Huang and Albert Zomaya",
  title =        "{GriDB}: Scaling Blockchain Database via Sharding and
                 Off-Chain Cross-Shard Mechanism",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1685--1698",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587143",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587143",
  abstract =     "Blockchain databases have attracted widespread
                 attention but suffer from poor scalability due to
                 underlying non-scalable blockchains. While blockchain
                 sharding is necessary for a scalable blockchain
                 database, it poses a new challenge named on-chain
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Jian:2023:SAS,
  author =       "Xun Jian and Zhiyuan Li and Lei Chen",
  title =        "{SUFF}: Accelerating Subgraph Matching with Historical
                 Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1699--1711",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587144",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587144",
  abstract =     "Subgraph matching is a fundamental problem in graph
                 theory and has wide applications in areas like
                 sociology, chemistry, and social networks. Due to its
                 NP-hardness, the basic approach is a brute-force search
                 over the whole search space. Some pruning \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{He:2023:WDM,
  author =       "Haochen He and Erci Xu and Shanshan Li and Zhouyang
                 Jia and Si Zheng and Yue Yu and Jun Ma and Xiangke
                 Liao",
  title =        "When Database Meets New Storage Devices: Understanding
                 and Exposing Performance Mismatches via
                 Configurations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1712--1725",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587145",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587145",
  abstract =     "NVMe SSD hugely boosts the I/O speed, with up to GB/s
                 throughput and microsecond-level latency.
                 Unfortunately, DBMS users can often find their
                 high-performanced storage devices tend to deliver
                 less-than-expected or even worse performance when
                 compared \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2023:SAD,
  author =       "Grace Fan and Jin Wang and Yuliang Li and Dan Zhang
                 and Ren{\'e}e J. Miller",
  title =        "Semantics-Aware Dataset Discovery from Data Lakes with
                 Contextualized Column-Based Representation Learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1726--1739",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587146",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587146",
  abstract =     "Dataset discovery from data lakes is essential in many
                 real application scenarios. In this paper, we propose
                 Starmie, an end-to-end framework for dataset discovery
                 from data lakes (with table union search as the main
                 use case). Our proposed framework \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mortensen:2023:MEM,
  author =       "Kasper Overgaard Mortensen and Fatemeh Zardbani and
                 Mohammad Ahsanul Haque and Steinn Ymir Agustsson and
                 Davide Mottin and Philip Hofmann and Panagiotis
                 Karras",
  title =        "Marigold: Efficient $k$-Means Clustering in High
                 Dimensions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1740--1748",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587147",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587147",
  abstract =     "How can we efficiently and scalably cluster
                 high-dimensional data? The k -means algorithm clusters
                 data by iteratively reducing intra-cluster Euclidean
                 distances until convergence. While it finds
                 applications from recommendation engines to image
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sabek:2023:CLM,
  author =       "Ibrahim Sabek and Tim Kraska",
  title =        "The Case for Learned In-Memory Joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1749--1762",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587148",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587148",
  abstract =     "In-memory join is an essential operator in any
                 database engine. It has been extensively investigated
                 in the database literature. In this paper, we study
                 whether exploiting the CDF-based learned models to
                 boost the join performance is practical. To the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:EEB,
  author =       "Ruiyuan Li and Zheng Li and Yi Wu and Chao Chen and Yu
                 Zheng",
  title =        "{Elf}: Erasing-Based Lossless Floating-Point
                 Compression",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1763--1776",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587149",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587149",
  abstract =     "There are a prohibitively large number of
                 floating-point time series data generated at an
                 unprecedentedly high rate. An efficient, compact and
                 lossless compression for time series data is of great
                 importance for a wide range of scenarios. Most existing
                 lossless floating-point compression methods are based
                 on the XOR operation, but they do not fully exploit the
                 trailing zeros, which usually results in an
                 unsatisfactory compression ratio. This paper proposes
                 an Erasing-based Lossless Floating-point compression
                 algorithm, i.e., \pkg{Elf}. The main idea of \pkg{Elf}
                 is to erase the last few bits (i.e., set them to zero)
                 of floating-point values, so the XORed values are
                 supposed to contain many trailing zeros. The challenges
                 of the erasing-based method are three-fold. First, how
                 to quickly determine the erased bits? Second, how to
                 losslessly recover the original data from the erased
                 ones? Third, how to compactly encode the erased data?
                 Through rigorous mathematical analysis, \pkg{Elf} can
                 directly determine the erased bits and restore the
                 original values without losing any precision. To
                 further improve the compression ratio, we propose a
                 novel encoding strategy for the XORed values with many
                 trailing zeros. \pkg{Elf} works in a streaming fashion.
                 It takes only $ O(N) $ (where $N$ is the length of a
                 time series) in time and $ O(1)$ in space, and achieves
                 a notable compression ratio with a theoretical
                 guarantee. Extensive experiments using 22 datasets show
                 the powerful performance of \pkg{Elf} compared with 9
                 advanced competitors.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:LLO,
  author =       "Tianyi Chen and Jun Gao and Hedui Chen and Yaofeng
                 Tu",
  title =        "{LOGER}: a Learned Optimizer Towards Generating
                 Efficient and Robust Query Execution Plans",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1777--1789",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587150",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587150",
  abstract =     "Query optimization based on deep reinforcement
                 learning (DRL) has become a hot research topic
                 recently. Despite the achieved promising progress, DRL
                 optimizers still face great challenges of robustly
                 producing efficient plans, due to the vast search
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Martens:2023:RPG,
  author =       "Wim Martens and Matthias Niewerth and Tina Popp and
                 Carlos Rojas and Stijn Vansummeren and Domagoj Vrgoc",
  title =        "Representing Paths in Graph Database Pattern
                 Matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "7",
  pages =        "1790--1803",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3587136.3587151",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue May 9 09:08:30 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3587136.3587151",
  abstract =     "Modern graph database query languages such as GQL,
                 SQL/PGQ, and their academic predecessor G-Core promote
                 paths to first-class citizens in the sense that their
                 pattern matching facility can return paths, as opposed
                 to only nodes and edges. This is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:ZVE,
  author =       "Xiling Li and Chenkai Weng and Yongxin Xu and Xiao
                 Wang and Jennie Rogers",
  title =        "{ZKSQL}: Verifiable and Efficient Query Evaluation
                 with Zero-Knowledge Proofs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1804--1816",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594513",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594513",
  abstract =     "Individuals and organizations are using databases to
                 store personal information at an unprecedented rate.
                 This creates a quandary for data providers. They are
                 responsible for protecting the privacy of individuals
                 described in their database. On the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Piao:2023:CGE,
  author =       "Chengzhi Piao and Tingyang Xu and Xiangguo Sun and Yu
                 Rong and Kangfei Zhao and Hong Cheng",
  title =        "Computing Graph Edit Distance via Neural Graph
                 Matching",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1817--1829",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594514",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594514",
  abstract =     "Graph edit distance (GED) computation is a fundamental
                 NP-hard problem in graph theory. Given a graph pair (
                 G$_1$, G$_2$ ), GED is defined as the minimum number of
                 primitive operations converting G$_1$ to G$_2$. Early
                 studies focus on search-based inexact algorithms
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schaler:2023:BUE,
  author =       "Christine Sch{\"a}ler and Thomas H{\"u}tter and Martin
                 Sch{\"a}ler",
  title =        "Benchmarking the Utility of $w$-Event Differential
                 Privacy Mechanisms --- When Baselines Become Mighty
                 Competitors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1830--1842",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594515",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594515",
  abstract =     "The w -event framework is the current standard for
                 ensuring differential privacy on continuously monitored
                 data streams. Following the proposition of w -event
                 differential privacy, various mechanisms to implement
                 the framework are proposed. Their \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Augustine:2023:CGA,
  author =       "Eriq Augustine and Lise Getoor",
  title =        "Collective Grounding: Applying Database Techniques to
                 Grounding Templated Models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1843--1855",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594516",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594516",
  abstract =     "The process of instantiating, or ``grounding'', a
                 first-order model is a fundamental component of
                 reasoning in logic. It has been widely studied in the
                 context of theorem proving, database theory, and
                 artificial intelligence. Within the relational
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Adams:2023:EEP,
  author =       "Jan Niklas Adams and Cameron Pitsch and Tobias
                 Brockhoff and Wil M. P. van der Aalst",
  title =        "An Experimental Evaluation of Process Concept Drift
                 Detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1856--1869",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594517",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594517",
  abstract =     "Process mining provides techniques to learn models
                 from event data. These models can be descriptive (e.g.,
                 Petri nets) or predictive (e.g., neural networks). The
                 learned models offer operational support to process
                 owners by conformance checking, process \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Vitagliano:2023:PDL,
  author =       "Gerardo Vitagliano and Mazhar Hameed and Lan Jiang and
                 Lucas Reisener and Eugene Wu and Felix Naumann",
  title =        "{Pollock}: a Data Loading Benchmark",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1870--1882",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594518",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594518",
  abstract =     "Any system at play in a data-driven project has a
                 fundamental requirement: the ability to load data. The
                 de-facto standard format to distribute and consume raw
                 data is csv. Yet, the plain text and flexible nature of
                 this format make such files often \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xiao:2023:APL,
  author =       "Yingtai Xiao and Guanhong Wang and Danfeng Zhang and
                 Daniel Kifer",
  title =        "Answering Private Linear Queries Adaptively Using the
                 Common Mechanism",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1883--1896",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594519",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594519",
  abstract =     "When analyzing confidential data through a privacy
                 filter, a data scientist often needs to decide which
                 queries will best support their intended analysis. For
                 example, an analyst may wish to study noisy two-way
                 marginals in a dataset produced by a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Du:2023:LLD,
  author =       "Yuntao Du and Yujia Hu and Zhikun Zhang and Ziquan
                 Fang and Lu Chen and Baihua Zheng and Yunjun Gao",
  title =        "{LDPTrace}: Locally Differentially Private Trajectory
                 Synthesis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1897--1909",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594520",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594520",
  abstract =     "Trajectory data has the potential to greatly benefit a
                 wide-range of real-world applications, such as tracking
                 the spread of the disease through people's movement
                 patterns and providing personalized location-based
                 services based on travel preference. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kitsios:2023:SPH,
  author =       "Xenophon Kitsios and Panagiotis Liakos and Katia
                 Papakonstantinopoulou and Yannis Kotidis",
  title =        "{Sim-Piece}: Highly Accurate Piecewise Linear
                 Approximation through Similar Segment Merging",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1910--1922",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594521",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594521",
  abstract =     "Approximating series of timestamped data points using
                 a sequence of line segments with a maximum error
                 guarantee is a fundamental data compression problem,
                 termed as piecewise linear approximation (PLA). Due to
                 the increasing need to analyze massive \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Marinelli:2023:TMF,
  author =       "Eugenio Marinelli and Yiqing Yan and Virginie Magnone
                 and Charlotte Dumargne and Pascal Barbry and Thomas
                 Heinis and Raja Appuswamy",
  title =        "Towards Migration-Free {``Just-in-Case''} Data
                 Archival for Future Cloud Data Lakes Using Synthetic
                 {DNA}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1923--1929",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594522",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594522",
  abstract =     "Given the growing adoption of AI, cloud data lakes are
                 facing the need to support cost-effective
                 ``just-in-case'' data archival over long time periods
                 to meet regulatory compliance requirements.
                 Unfortunately, current media technologies suffer from
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dong:2023:FGR,
  author =       "Zhiyuan Dong and Zhaoguo Wang and Xiaodong Zhang and
                 Xian Xu and Changgeng Zhao and Haibo Chen and Aurojit
                 Panda and Jinyang Li",
  title =        "Fine-Grained Re-Execution for Efficient Batched Commit
                 of Distributed Transactions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1930--1943",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594523",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594523",
  abstract =     "Distributed transaction systems incur extensive
                 cross-node communication to execute and commit
                 serializable OLTP transactions. As a result, their
                 performance greatly suffers. Caching data at nodes that
                 execute transactions can cut down remote reads.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fan:2023:LDT,
  author =       "Wenfei Fan and Resul Tugay and Yaoshu Wang and Min Xie
                 and Muhammad Asif Ali",
  title =        "Learning and Deducing Temporal Orders",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1944--1957",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594524",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594524",
  abstract =     "This paper studies how to determine temporal orders on
                 attribute values in a set of tuples that pertain to the
                 same entity, in the absence of complete timestamps. We
                 propose a creator-critic framework to learn and deduce
                 temporal orders by combining \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:BBG,
  author =       "Xu Chen and Zhen Wang and Shuncheng Liu and Yaliang Li
                 and Kai Zeng and Bolin Ding and Jingren Zhou and Han Su
                 and Kai Zheng",
  title =        "{BASE}: Bridging the Gap between Cost and Latency for
                 Query Optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1958--1966",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594525",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594525",
  abstract =     "Some recent works have shown the advantages of
                 reinforcement learning (RL) based learned query
                 optimizers. These works often use the cost (i.e., the
                 estimation of cost model) or the latency (i.e.,
                 execution time) as guidance signals for training their
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lemiesz:2023:EFO,
  author =       "Jakub Lemiesz",
  title =        "Efficient Framework for Operating on Data Sketches",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1967--1978",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594526",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594526",
  abstract =     "We study the problem of analyzing massive data streams
                 based on concise data sketches. Recently, a number of
                 papers have investigated how to estimate the results of
                 set-theory operations based on sketches. In this paper
                 we present a framework that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2023:TEI,
  author =       "Xi Zhao and Yao Tian and Kai Huang and Bolong Zheng
                 and Xiaofang Zhou",
  title =        "Towards Efficient Index Construction and Approximate
                 Nearest Neighbor Search in High-Dimensional Spaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1979--1991",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594527",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594527",
  abstract =     "The approximate nearest neighbor (ANN) search in
                 high-dimensional spaces is a fundamental but
                 computationally very expensive problem. Many methods
                 have been designed for solving the ANN problem, such as
                 LSH-based methods and graph-based methods. The LSH-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2023:LIC,
  author =       "Zhaoyan Sun and Xuanhe Zhou and Guoliang Li",
  title =        "Learned Index: a Comprehensive Experimental
                 Evaluation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "1992--2004",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594528",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594528",
  abstract =     "Indexes can improve query-processing performance by
                 avoiding full table scans. Although traditional indexes
                 (e.g., B+-tree) have been widely used, learned indexes
                 are proposed to adopt machine learning models to reduce
                 the query latency and index size. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:LIG,
  author =       "Yanping Zhang and Johes Bater and Kartik Nayak and
                 Ashwin Machanavajjhala",
  title =        "{Longshot}: Indexing Growing Databases Using {MPC} and
                 Differential Privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "2005--2018",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594529",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594529",
  abstract =     "In this work, we propose Longshot, a novel design for
                 secure outsourced database systems that supports ad-hoc
                 queries through the use of secure multi-party
                 computation and differential privacy. By combining
                 these two techniques, we build and maintain \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Paparrizos:2023:ASS,
  author =       "John Paparrizos and Kaize Wu and Aaron Elmore and
                 Christos Faloutsos and Michael J. Franklin",
  title =        "Accelerating Similarity Search for Elastic Measures: a
                 Study and New Generalization of Lower Bounding
                 Distances",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "2019--2032",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594530",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594530",
  abstract =     "Similarity search is a core analytical task, and its
                 performance critically depends on the choice of
                 distance measure. For time-series querying, elastic
                 measures achieve state-of-the-art accuracy but are
                 computationally expensive. Thus, fast lower \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2023:ALA,
  author =       "Chenyuan Wu and Bhavana Mehta and Mohammad Javad Amiri
                 and Ryan Marcus and Boon Thau Loo",
  title =        "{AdaChain}: a Learned Adaptive Blockchain",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "2033--2046",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594531",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594531",
  abstract =     "This paper presents AdaChain, a learning-based
                 blockchain framework that adaptively chooses the best
                 permissioned blockchain architecture to optimize
                 effective throughput for dynamic transaction workloads.
                 AdaChain addresses the challenge in Blockchain
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2023:ICS,
  author =       "Yingli Zhou and Yixiang Fang and Wensheng Luo and
                 Yunming Ye",
  title =        "Influential Community Search over Large Heterogeneous
                 Information Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "8",
  pages =        "2047--2060",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3594512.3594532",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Fri Jun 23 11:11:42 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3594512.3594532",
  abstract =     "Recently, the topic of influential community search
                 has gained much attention. Given a graph, it aims to
                 find communities of vertices with high importance
                 values from it. Existing works mainly focus on
                 conventional homogeneous networks, where vertices
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Arafat:2023:NBH,
  author =       "Naheed Anjum Arafat and Arijit Khan and Arpit Kumar
                 Rai and Bishwamittra Ghosh",
  title =        "Neighborhood-Based Hypergraph Core Decomposition",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2061--2074",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598582",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598582",
  abstract =     "We propose neighborhood-based core decomposition: a
                 novel way of decomposing hypergraphs into hierarchical
                 neighborhood-cohesive subhypergraphs. Alternative
                 approaches to decomposing hypergraphs, e.g., reduction
                 to clique or bipartite graphs, are not \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Layne:2023:TSG,
  author =       "Janet Layne and Justin Carpenter and Edoardo Serra and
                 Francesco Gullo",
  title =        "Temporal {SIR-GN}: Efficient and Effective Structural
                 Representation Learning for Temporal Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2075--2089",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598583",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598583",
  abstract =     "Node representation learning (NRL) generates numerical
                 vectors (embeddings) for the nodes of a graph.
                 Structural NRL specifically assigns similar node
                 embeddings for those nodes that exhibit similar
                 structural roles. This is in contrast with its
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Haas:2023:WMN,
  author =       "Gabriel Haas and Viktor Leis",
  title =        "What Modern {NVMe} Storage Can Do, and How to Exploit
                 it: High-Performance {I/O} for High-Performance Storage
                 Engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2090--2102",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598584",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598584",
  abstract =     "NVMe SSDs based on flash are cheap and offer high
                 throughput. Combining several of these devices into a
                 single server enables 10 million I/O operations per
                 second or more. Our experiments show that existing
                 out-of-memory database systems and storage engines
                 achieve only a fraction of this performance. In this
                 work, we demonstrate that it is possible to close the
                 performance gap between hardware and software through
                 an I/O optimized storage engine design. In a heavy
                 out-of-memory setting, where the dataset is 10 times
                 larger than main memory, our system can achieve more
                 than 1 million TPC-C transactions per second.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Banakar:2023:WES,
  author =       "Vinay Banakar and Kan Wu and Yuvraj Patel and Kimberly
                 Keeton and Andrea C. Arpaci-Dusseau and Remzi H.
                 Arpaci-Dusseau",
  title =        "{WiscSort}: External Sorting for Byte-Addressable
                 Storage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2103--2116",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598585",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598585",
  abstract =     "We present WiscSort, a new approach to
                 high-performance concurrent sorting for existing and
                 future byte-addressable storage (BAS) devices. WiscSort
                 carefully reduces writes, exploits random reads by
                 splitting keys and values during sorting, and performs
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ayad:2023:TIL,
  author =       "Lorraine A. K. Ayad and Grigorios Loukides and Solon
                 P. Pissis",
  title =        "Text Indexing for Long Patterns: Anchors are All you
                 Need",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2117--2131",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598586",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598586",
  abstract =     "In many real-world database systems, a large fraction
                 of the data is represented by strings: sequences of
                 letters over some alphabet. This is because strings can
                 easily encode data arising from different sources. It
                 is often crucial to represent such \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Afroozeh:2023:FCL,
  author =       "Azim Afroozeh and Peter Boncz",
  title =        "The {FastLanes} Compression Layout: Decoding $> 100$
                 Billion Integers per Second with Scalar Code",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2132--2144",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598587",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598587",
  abstract =     "The open-source FastLanes project aims to improve big
                 data formats, such as Parquet, ORC and columnar
                 database formats, in multiple ways. In this paper, we
                 significantly accelerate decoding of all common
                 Light-Weight Compression (LWC) schemes: DICT, FOR,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yue:2023:VAP,
  author =       "Cong Yue and Meihui Zhang and Changhao Zhu and Gang
                 Chen and Dumitrel Loghin and Beng Chin Ooi",
  title =        "{VeriBench}: Analyzing the Performance of Database
                 Systems with Verifiability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2145--2157",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598588",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598588",
  abstract =     "Database systems are paying more attention to data
                 security in recent years. Immutable systems such as
                 blockchains, verifiable databases, and ledger databases
                 are equipped with various verifiability mechanisms to
                 protect data. Such systems often adopt \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:TDL,
  author =       "Jiangneng Li and Zheng Wang and Gao Cong and Cheng
                 Long and Han Mao Kiah and Bin Cui",
  title =        "Towards Designing and Learning Piecewise Space-Filling
                 Curves",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2158--2171",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598589",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598589",
  abstract =     "To index multi-dimensional data, space-filling curves
                 (SFCs) have been used to map the data to one dimension,
                 and then a one-dimensional indexing method such as the
                 B-tree is used to index the mapped data. The existing
                 SFCs all adopt a single mapping \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2023:MQB,
  author =       "Xiaoke Zhu and Yang Liu and Shuhao Liu and Wenfei
                 Fan",
  title =        "{MiniGraph}: Querying Big Graphs with a Single
                 Machine",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2172--2185",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598590",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598590",
  abstract =     "This paper presents MiniGraph, an out-of-core system
                 for querying big graphs with a single machine. As
                 opposed to previous single-machine graph systems,
                 MiniGraph proposes a pipelined architecture to overlap
                 I/O and CPU operations, and improves multi-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Choi:2023:BEC,
  author =       "Yunyoung Choi and Kunsoo Park and Hyunjoon Kim",
  title =        "{BICE}: Exploring Compact Search Space by Using
                 Bipartite Matching and Cell-Wide Verification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2186--2198",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598591",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598591",
  abstract =     "Subgraph matching is the problem of searching for all
                 embeddings of a query graph in a data graph, and
                 subgraph query processing (also known as subgraph
                 search) is to find all the data graphs that contain a
                 query graph as subgraphs. Extensive research \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tian:2023:MDT,
  author =       "Anxin Tian and Alexander Zhou and Yue Wang and Lei
                 Chen",
  title =        "Maximal {D}-Truss Search in Dynamic Directed Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2199--2211",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598592",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598592",
  abstract =     "Community search (CS) aims at personalized subgraph
                 discovery which is the key to understanding the
                 organisation of many real-world networks. CS in
                 undirected networks has attracted significant attention
                 from researchers, including many solutions for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:DDD,
  author =       "Pengfei Li and Hua Lu and Rong Zhu and Bolin Ding and
                 Long Yang and Gang Pan",
  title =        "{DILI}: a Distribution-Driven Learned Index",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2212--2224",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598593",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598593",
  abstract =     "Targeting in-memory one-dimensional search keys, we
                 propose a novel DIstribution-driven Learned Index tree
                 (DILI), where a concise and computation-efficient
                 linear regression model is used for each node. An
                 internal node's key range is equally divided \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zeakis:2023:PTE,
  author =       "Alexandros Zeakis and George Papadakis and Dimitrios
                 Skoutas and Manolis Koubarakis",
  title =        "Pre-Trained Embeddings for Entity Resolution: an
                 Experimental Analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2225--2238",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598594",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598594",
  abstract =     "Many recent works on Entity Resolution (ER) leverage
                 Deep Learning techniques involving language models to
                 improve effectiveness. This is applied to both main
                 steps of ER, i.e., blocking and matching. Several
                 pre-trained embeddings have been tested, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zheng:2023:DGN,
  author =       "Yanping Zheng and Zhewei Wei and Jiajun Liu",
  title =        "Decoupled Graph Neural Networks for Large Dynamic
                 Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2239--2247",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598595",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598595",
  abstract =     "Real-world graphs, such as social networks, financial
                 transactions, and recommendation systems, often
                 demonstrate dynamic behavior. This phenomenon, known as
                 graph stream, involves the dynamic changes of nodes and
                 the emergence and disappearance of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zardbani:2023:AIO,
  author =       "Fatemeh Zardbani and Nikos Mamoulis and Stratos Idreos
                 and Panagiotis Karras",
  title =        "Adaptive Indexing of Objects with Spatial Extent",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2248--2260",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598596",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598596",
  abstract =     "Can we quickly explore large multidimensional data in
                 main memory? Adaptive indexing responds to this need by
                 building an index incrementally, in response to
                 queries; in its default form, it indexes a single
                 attribute or, in the presence of several \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:LNF,
  author =       "Xu Chen and Haitian Chen and Zibo Liang and Shuncheng
                 Liu and Jinghong Wang and Kai Zeng and Han Su and Kai
                 Zheng",
  title =        "{LEON}: a New Framework for {ML}-Aided Query
                 Optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2261--2273",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598597",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598597",
  abstract =     "Query optimization has long been a fundamental yet
                 challenging topic in the database field. With the
                 prosperity of machine learning (ML), some recent works
                 have shown the advantages of reinforcement learning
                 (RL) based learned query optimizer. However, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Faria:2023:TIT,
  author =       "Nuno Faria and Jos{\'e} Pereira and Ana Nunes Alonso
                 and Ricardo Vila{\c{c}}a and Yunus Koning and Niels
                 Nes",
  title =        "{TiQuE}: Improving the Transactional Performance of
                 Analytical Systems for True Hybrid Workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2274--2288",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598598",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598598",
  abstract =     "Transactions have been a key issue in database
                 management for a long time and there are a plethora of
                 architectures and algorithms to support and implement
                 them. The current state-of-the-art is focused on
                 storage management and is tightly coupled with
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bang:2023:SRQ,
  author =       "Jaeho Bang and Gaurav Tarlok Kakkar and Pramod
                 Chunduri and Subrata Mitra and Joy Arulraj",
  title =        "Seiden: Revisiting Query Processing in Video Database
                 Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2289--2301",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598599",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598599",
  abstract =     "State-of-the-art video database management systems
                 (VDBMSs) often use lightweight proxy models to
                 accelerate object retrieval and aggregate queries. The
                 key assumption underlying these systems is that the
                 proxy model is an order of magnitude faster than
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kossmann:2023:ETL,
  author =       "Ferdi Kossmann and Ziniu Wu and Eugenie Lai and Nesime
                 Tatbul and Lei Cao and Tim Kraska and Sam Madden",
  title =        "Extract-Transform-Load for Video Streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2302--2315",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598600",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598600",
  abstract =     "Social media, self-driving cars, and traffic cameras
                 produce video streams at large scales and cheap cost.
                 However, storing and querying video at such scales is
                 prohibitively expensive. We propose to treat
                 large-scale video analytics as a data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sudhir:2023:PED,
  author =       "Sivaprasad Sudhir and Wenbo Tao and Nikolay Laptev and
                 Cyrille Habis and Michael Cafarella and Samuel Madden",
  title =        "{Pando}: Enhanced Data Skipping with Logical Data
                 Partitioning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2316--2329",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598601",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598601",
  abstract =     "With enormous volumes of data, quickly retrieving data
                 that is relevant to a query is essential for achieving
                 high performance. Modern cloud-based database systems
                 often partition the data into blocks and employ various
                 techniques to skip irrelevant \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Maliszewski:2023:CLJ,
  author =       "Kajetan Maliszewski and Jorge-Arnulfo Quian{\'e}-Ruiz
                 and Volker Markl",
  title =        "Cracking-Like Join for Trusted Execution
                 Environments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2330--2343",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598602",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598602",
  abstract =     "Data processing on non-trusted infrastructures, such
                 as the public cloud, has become increasingly popular,
                 despite posing risks to data privacy. However, the
                 existing cloud DBMSs either lack sufficient privacy
                 guarantees or underperform. In this paper, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Calikyilmaz:2023:OQA,
  author =       "Umut {\c{C}}alikyilmaz and Sven Groppe and Jinghua
                 Groppe and Tobias Winker and Stefan Prestel and Farida
                 Shagieva and Daanish Arya and Florian Preis and Le
                 Gruenwald",
  title =        "Opportunities for Quantum Acceleration of Databases:
                 Optimization of Queries and Transaction Schedules",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2344--2353",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598603",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598603",
  abstract =     "The capabilities of quantum computers, such as the
                 number of supported qubits and maximum circuit depth,
                 have grown exponentially in recent years. Commercially
                 relevant applications that take advantage of quantum
                 computing are expected to be available \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Miao:2023:SSD,
  author =       "Xupeng Miao and Yining Shi and Zhi Yang and Bin Cui
                 and Zhihao Jia",
  title =        "{SDPipe}: a Semi-Decentralized Framework for
                 Heterogeneity-Aware Pipeline-parallel Training",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2354--2363",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598604",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598604",
  abstract =     "The increasing size of both deep learning models and
                 training data necessitates the ability to scale out
                 model training through pipeline-parallel training,
                 which combines pipelined model parallelism and data
                 parallelism. However, most of them assume an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2023:LCP,
  author =       "Bohyun Lee and Mijin An and Sang-Won Lee",
  title =        "{LRU-C}: Parallelizing Database {I/Os} for Flash
                 {SSDs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2364--2376",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598605",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598605",
  abstract =     "The conventional database buffer managers have two
                 inherent sources of I/O serialization: read stall and
                 mutex conflict. The serialized I/O makes storage and
                 CPU under-utilized, limiting transaction throughput and
                 latency. Such harm stands out on flash \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:WYF,
  author =       "Zixuan Chen and Panagiotis Manolios and Mirek
                 Riedewald",
  title =        "Why Not Yet: Fixing a Top-$k$ Ranking that is Not Fair
                 to Individuals",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "9",
  pages =        "2377--2390",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3598581.3598606",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3598581.3598606",
  abstract =     "This work considers why-not questions in the context
                 of top-k queries and score-based ranking functions.
                 Following the popular linear scalarization approach for
                 multi-objective optimization, we study rankings based
                 on the weighted sum of multiple \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sharma:2023:ITS,
  author =       "Shantanu Sharma and Yin Li and Sharad Mehrotra and
                 Nisha Panwar and Komal Kumari and Swagnik
                 Roychoudhury",
  title =        "Information-Theoretically Secure and Highly Efficient
                 Search and Row Retrieval",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2391--2403",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603582",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603582",
  abstract =     "Information-theoretic or unconditional security
                 provides the highest level of security --- independent
                 of the computational capability of an adversary.
                 Secret-sharing techniques achieve information-theoretic
                 security by splitting a secret into multiple \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kato:2023:OOF,
  author =       "Fumiyuki Kato and Yang Cao and Masatoshi Yoshikawa",
  title =        "{Olive}: Oblivious Federated Learning on Trusted
                 Execution Environment against the Risk of
                 Sparsification",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2404--2417",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603583",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603583",
  abstract =     "Combining Federated Learning (FL) with a Trusted
                 Execution Environment (TEE) is a promising approach for
                 realizing privacy-preserving FL, which has garnered
                 significant academic attention in recent years.
                 Implementing the TEE on the server side enables
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Luo:2023:TEF,
  author =       "Chengyang Luo and Qing Liu and Yunjun Gao and Lu Chen
                 and Ziheng Wei and Congcong Ge",
  title =        "{Task}: an Efficient Framework for Instant
                 Error-Tolerant Spatial Keyword Queries on Road
                 Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2418--2430",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603584",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603584",
  abstract =     "Instant spatial keyword queries return the results as
                 soon as users type in some characters instead of a
                 complete keyword, which allow users to query the
                 geo-textual data in a type-as-you-search manner.
                 However, the existing methods of instant spatial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kassaie:2023:ACI,
  author =       "Besat Kassaie and Frank Wm. Tompa",
  title =        "Autonomously Computable Information Extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2431--2443",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603585",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603585",
  abstract =     "Most optimization techniques deployed in information
                 extraction systems assume that source documents are
                 static. Instead, extracted relations can be considered
                 to be materialized views defined by a language built on
                 regular expressions. Using this \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Koutsoukos:2023:NIV,
  author =       "Dimitrios Koutsoukos and Raghav Bhartia and Michal
                 Friedman and Ana Klimovic and Gustavo Alonso",
  title =        "{NVM}: Is it Not Very Meaningful for Databases?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2444--2457",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603586",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603586",
  abstract =     "Persistent or Non Volatile Memory (PMEM) offers
                 expanded memory capacity and faster access to
                 persistent storage. However, there is no comprehensive
                 empirical analysis of existing database engines under
                 different PMEM modes, to understand how databases
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Dong:2023:DJT,
  author =       "Yuyang Dong and Chuan Xiao and Takuma Nozawa and
                 Masafumi Enomoto and Masafumi Oyamada",
  title =        "{DeepJoin}: Joinable Table Discovery with Pre-Trained
                 Language Models",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2458--2470",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603587",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603587",
  abstract =     "Due to the usefulness in data enrichment for data
                 analysis tasks, joinable table discovery has become an
                 important operation in data lake management. Existing
                 approaches target equi-joins, the most common way of
                 combining tables for creating a unified \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2023:FPP,
  author =       "Yuncheng Wu and Naili Xing and Gang Chen and Tien Tuan
                 Anh Dinh and Zhaojing Luo and Beng Chin Ooi and Xiaokui
                 Xiao and Meihui Zhang",
  title =        "{Falcon}: a Privacy-Preserving and Interpretable
                 Vertical Federated Learning System",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2471--2484",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603588",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603588",
  abstract =     "Federated learning (FL) enables multiple data owners
                 to collaboratively train machine learning (ML) models
                 without disclosing their raw data. In the vertical
                 federated learning (VFL) setting, the collaborating
                 parties have data from the same set of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gao:2023:ESE,
  author =       "Haotian Gao and Cong Yue and Tien Tuan Anh Dinh and
                 Zhiyong Huang and Beng Chin Ooi",
  title =        "Enabling Secure and Efficient Data Analytics Pipeline
                 Evolution with Trusted Execution Environment",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2485--2498",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603589",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603589",
  abstract =     "Modern data analytics pipelines are highly dynamic, as
                 they are constantly monitored and fine-tuned by both
                 data engineers and scientists. Recent systems managing
                 pipelines ease creating, deploying, and tracking their
                 evolution. However, privacy \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Doraiswamy:2023:CGD,
  author =       "Harish Doraiswamy and Vikas Kalagi and Karthik
                 Ramachandra and Jayant R. Haritsa",
  title =        "A Case for Graphics-Driven Query Processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2499--2511",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603590",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603590",
  abstract =     "Over the past decade, the database research community
                 has directed considerable attention towards harnessing
                 the power of GPUs in query processing engines. The
                 proposed techniques have primarily focused on devising
                 customized low-level mechanisms that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tian:2023:EER,
  author =       "Wei Tian and Jieming Shi and Siqiang Luo and Hui Li
                 and Xike Xie and Yuanhang Zou",
  title =        "Effective and Efficient Route Planning Using
                 Historical Trajectories on Road Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2512--2524",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603591",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603591",
  abstract =     "We study route planning that utilizes historical
                 trajectories to predict a realistic route from a source
                 to a destination on a road network at given departure
                 time. Route planning is a fundamental task in many
                 location-based services. It is challenging \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lampropoulos:2023:AIH,
  author =       "Konstantinos Lampropoulos and Fatemeh Zardbani and
                 Nikos Mamoulis and Panagiotis Karras",
  title =        "Adaptive Indexing in High-Dimensional Metric Spaces",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2525--2537",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603592",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603592",
  abstract =     "Similarity search in high-dimensional metric spaces is
                 routinely used in many applications including
                 content-based image retrieval, bioinformatics, data
                 mining, and recommender systems. Search can be
                 accelerated by the use of an index. However, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gao:2023:PCS,
  author =       "Sen Gao and Hongchao Qin and Rong-Hua Li and Bingsheng
                 He",
  title =        "Parallel Colorful $h$-Star Core Maintenance in Dynamic
                 Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2538--2550",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603593",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603593",
  abstract =     "The higher-order structure cohesive subgraph mining is
                 an important operator in many graph analysis tasks.
                 Recently, the colorful h -star core model has been
                 proposed as an effective alternative to h -clique based
                 cohesive subgraph models, in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:MFM,
  author =       "Jia Li and Wenyue Zhao and Nikos Ntarmos and Yang Cao
                 and Peter Buneman",
  title =        "{MITra}: a Framework for Multi-Instance Graph
                 Traversal",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2551--2564",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603594",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603594",
  abstract =     "This paper presents MITra, a framework for composing
                 multi-instance graph algorithms that traverse from
                 multiple source vertices simultaneously over a single
                 thread. Underlying MITra is a model of multi-instance
                 traversal that uniformly captures \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:CEB,
  author =       "Jiazun Chen and Yikuan Xia and Jun Gao",
  title =        "{CommunityAF}: an Example-Based Community Search
                 Method via Autoregressive Flow",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2565--2577",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603595",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603595",
  abstract =     "Example-based community search utilizes hidden
                 patterns of given examples rather than explicit rules,
                 reducing users' burden and enhancing flexibility.
                 However, existing works face challenges such as low
                 scalability, high training cost, and improper
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2023:ABA,
  author =       "Yiming Lin and Yeye He and Surajit Chaudhuri",
  title =        "{Auto-BI}: Automatically Build {BI}-Models Leveraging
                 Local Join Prediction and Global Schema Graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2578--2590",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603596",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603596",
  abstract =     "Business Intelligence (BI) is crucial in modern
                 enterprises and billion-dollar business. Traditionally,
                 technical experts like database administrators would
                 manually prepare BI-models (e.g., in star or snowflake
                 schemas) that join tables in data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:TDC,
  author =       "Yuemin Zhang and Qingqing Ye and Rui Chen and Haibo Hu
                 and Qilong Han",
  title =        "Trajectory Data Collection with Local Differential
                 Privacy",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2591--2604",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603597",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603597",
  abstract =     "Trajectory data collection is a common task with many
                 applications in our daily lives. Analyzing trajectory
                 data enables service providers to enhance their
                 services, which ultimately benefits users. However,
                 directly collecting trajectory data may give \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gao:2023:LNM,
  author =       "Jian Gao and Xin Cao and Xin Yao and Gong Zhang and
                 Wei Wang",
  title =        "{LMSFC}: a Novel Multidimensional Index Based on
                 Learned Monotonic Space Filling Curves",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2605--2617",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603598",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603598",
  abstract =     "The recently proposed learned indexes have attracted
                 much attention as they can adapt to the actual data and
                 query distributions to attain better search efficiency.
                 Based on this technique, several existing works build
                 up indexes for multi-dimensional \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rong:2023:SDC,
  author =       "Kexin Rong and Mihai Budiu and Athinagoras
                 Skiadopoulos and Lalith Suresh and Amy Tai",
  title =        "Scaling a Declarative Cluster Manager Architecture
                 with Query Optimization Techniques",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2618--2631",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603599",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603599",
  abstract =     "Cluster managers play a crucial role in data centers
                 by distributing workloads among infrastructure
                 resources. Declarative Cluster Management (DCM) is a
                 new cluster management architecture that enables users
                 to express placement policies declaratively \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Singh:2023:CLT,
  author =       "Mukul Singh and Jos{\'e} Cambronero S{\'a}nchez and
                 Sumit Gulwani and Vu Le and Carina Negreanu and
                 Mohammad Raza and Gust Verbruggen",
  title =        "{Cornet}: Learning Table Formatting Rules By Example",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2632--2644",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603600",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603600",
  abstract =     "Spreadsheets are widely used for table manipulation
                 and presentation. Stylistic formatting of these tables
                 is an important property for presentation and analysis.
                 As a result, popular spreadsheet software, such as
                 Excel, supports automatically formatting tables based
                 on rules. Unfortunately, writing such formatting rules
                 can be challenging for users as it requires knowledge
                 of the underlying rule language and data logic. We
                 present Cornet, a system that tackles the novel problem
                 of automatically learning such formatting rules from
                 user-provided formatted cells. Cornet takes inspiration
                 from advances in inductive programming and combines
                 symbolic rule enumeration with a neural ranker to learn
                 conditional formatting rules. To motivate and evaluate
                 our approach, we extracted tables with over 450K unique
                 formatting rules from a corpus of over 1.8M real
                 worksheets. Since we are the first to introduce the
                 task of automatically learning conditional formatting
                 rules, we compare Cornet to a wide range of symbolic
                 and neural baselines adapted from related domains. Our
                 results show that Cornet accurately learns rules across
                 varying setups. Additionally, we show that in some
                 cases Cornet can find rules that are shorter than those
                 written by users and can also discover rules in
                 spreadsheets that users have manually formatted.
                 Furthermore, we present two case studies investigating
                 the generality of our approach by extending Cornet to
                 related data tasks (e.g., filtering) and generalizing
                 to conditional formatting over multiple columns.",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zuo:2023:AAR,
  author =       "Chaoji Zuo and Dong Deng",
  title =        "{ARKGraph}: All-Range Approximate
                 {$K$}-Nearest-Neighbor Graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2645--2658",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603601",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603601",
  abstract =     "Given a collection of vectors, the approximate
                 K-nearest-neighbor graph (KGraph for short) connects
                 every vector to its approximate K-nearest-neighbors
                 (KNN for short). KGraph plays an important role in high
                 dimensional data visualization, semantic \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Youngmann:2023:CDI,
  author =       "Brit Youngmann and Michael Cafarella and Babak Salimi
                 and Anna Zeng",
  title =        "Causal Data Integration",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2659--2665",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603602",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603602",
  abstract =     "Causal inference is fundamental to empirical
                 scientific discoveries in natural and social sciences;
                 however, in the process of conducting causal inference,
                 data management problems can lead to false discoveries.
                 Two such problems are (i) not having all \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Martini:2023:MFI,
  author =       "Michael Martini and Daniel Schuster and Wil M. P. van
                 der Aalst",
  title =        "Mining Frequent Infix Patterns from Concurrency-Aware
                 Process Execution Variants",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2666--2678",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603603",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603603",
  abstract =     "Event logs, as considered in process mining, document
                 a large number of individual process executions.
                 Moreover, each process execution consists of various
                 executed activities. To cope with the vast amount of
                 process executions in event logs, the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pedreira:2023:CDM,
  author =       "Pedro Pedreira and Orri Erling and Konstantinos
                 Karanasos and Scott Schneider and Wes McKinney and
                 Satya R. Valluri and Mohamed Zait and Jacques Nadeau",
  title =        "The Composable Data Management System Manifesto",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "10",
  pages =        "2679--2685",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3603581.3603604",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Aug 9 10:33:02 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3603581.3603604",
  abstract =     "The requirement for specialization in data management
                 systems has evolved faster than our software
                 development practices. After decades of organic growth,
                 this situation has created a siloed landscape composed
                 of hundreds of products developed and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schmitt:2023:TLS,
  author =       "Daniel Schmitt and Daniel Kocher and Nikolaus Augsten
                 and Willi Mann and Alexander Miller",
  title =        "A Two-Level Signature Scheme for Stable Set Similarity
                 Joins",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2686--2698",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611480",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611480",
  abstract =     "We study the set similarity join problem, which
                 retrieves all pairs of similar sets from two
                 collections of sets for a given distance function.
                 Existing exact solutions employ a signature-based
                 filter-verification framework: If two sets are similar,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Rodriguez:2023:SRD,
  author =       "Olivier Rodriguez and Federico Ulliana and Marie-Laure
                 Mugnier",
  title =        "Scalable Reasoning on Document Stores via
                 Instance-Aware Query Rewriting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2699--2713",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611481",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611481",
  abstract =     "Data trees, typically encoded in JSON, are ubiquitous
                 in data-driven applications. This ubiquity makes urgent
                 the development of novel techniques for querying
                 heterogeneous JSON data in a flexible manner. We
                 propose a rule language for JSON, called \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:EVS,
  author =       "Enhao Zhang and Maureen Daum and Dong He and Brandon
                 Haynes and Ranjay Krishna and Magdalena Balazinska",
  title =        "{EQUI-VOCAL}: Synthesizing Queries for Compositional
                 Video Events from Limited User Interactions",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2714--2727",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611482",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611482",
  abstract =     "We introduce EQUI-VOCAL: a new system that
                 automatically synthesizes queries over videos from
                 limited user interactions. The user only provides a
                 handful of positive and negative examples of what they
                 are looking for. EQUI-VOCAL utilizes these initial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:LBG,
  author =       "Yuhao Zhang and Arun Kumar",
  title =        "{Lotan}: Bridging the Gap between {GNNs} and Scalable
                 Graph Analytics Engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2728--2741",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611483",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611483",
  abstract =     "Recent advances in Graph Neural Networks (GNNs) have
                 changed the landscape of modern graph analytics. The
                 complexity of GNN training and the scalability
                 challenges have also sparked interest from the systems
                 community, with efforts to build systems that
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kraft:2023:EAT,
  author =       "Peter Kraft and Qian Li and Xinjing Zhou and Peter
                 Bailis and Michael Stonebraker and Matei Zaharia and
                 Xiangyao Yu",
  title =        "{Epoxy}: {ACID} Transactions across Diverse Data
                 Stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2742--2754",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611484",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611484",
  abstract =     "Developers are increasingly building applications that
                 incorporate multiple data stores, for example to manage
                 heterogeneous data. Often, these require transactional
                 safety for operations across stores, but few systems
                 support such guarantees. To solve \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bother:2023:AVH,
  author =       "Maximilian B{\"o}ther and Lawrence Benson and Ana
                 Klimovic and Tilmann Rabl",
  title =        "Analyzing Vectorized Hash Tables across {CPU}
                 Architectures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2755--2768",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611485",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611485",
  abstract =     "Data processing systems often leverage vector
                 instructions to achieve higher performance. When
                 applying vector instructions, an often overlooked data
                 structure is the hash table, even though it is
                 fundamental in data processing systems for operations
                 such as indexing, aggregating, and joining. In this
                 paper, we characterize and evaluate three fundamental
                 vectorized hashing schemes, vectorized linear probing
                 (VLP), vectorized fingerprinting (VFP), and
                 bucket-based comparison (BBC). We implement these
                 hashing schemes on the x86, ARM, and Power CPU
                 architectures, as modern database systems must provide
                 efficient implementations for multiple platforms due to
                 the continuously increasing hardware heterogeneity. We
                 present various implementation variants and
                 platform-specific optimizations, which we evaluate for
                 integer keys, string keys, large payloads, skewed
                 distributions, and multiple threads. Our extensive
                 evaluation and comparison to three scalar hashing
                 schemes on four servers shows that BBC outperforms
                 scalar linear probing by a factor of more than 2x,
                 while also scaling well to high load factors. We find
                 that vectorized hashing schemes come with caveats that
                 need to be considered, such as the increased
                 engineering overhead, differences between CPUs, and
                 differences between vector ISAs, such as AVX and
                 AVX-512, which impact performance. We conclude with key
                 findings for vectorized hashing scheme
                 implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Durner:2023:ECO,
  author =       "Dominik Durner and Viktor Leis and Thomas Neumann",
  title =        "Exploiting Cloud Object Storage for High-Performance
                 Analytics",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2769--2782",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611486",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611486",
  abstract =     "Elasticity of compute and storage is crucial for
                 analytical cloud database systems. All cloud vendors
                 provide disaggregated object stores, which can be used
                 as storage backend for analytical query engines. Until
                 recently, local storage was unavoidable \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Karapiperis:2023:RBS,
  author =       "Dimitrios Karapiperis and Christos Tjortjis and
                 Vassilios S. Verykios",
  title =        "A Randomized Blocking Structure for Streaming Record
                 Linkage",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2783--2791",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611487",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611487",
  abstract =     "A huge amount of data, in terms of streams, are
                 collected nowadays via a variety of sources, such as
                 sensors, mobile devices, or even raw log files. The
                 unprecedented rate at which these data are generated
                 and collected calls for novel record linkage \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Riveros:2023:RNR,
  author =       "Cristian Riveros and Nicol{\'a}s {Van Sint Jan} and
                 Domagoj Vrgoc",
  title =        "{REmatch}: a Novel Regex Engine for Finding All
                 Matches",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2792--2804",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611488",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611488",
  abstract =     "In this paper, we present the REmatch system for
                 information extraction. REmatch is based on a recently
                 proposed enumeration algorithm for evaluating regular
                 expressions with capture variables supporting the
                 all-match semantics. It tells a story of what it takes
                 to make a theoretically optimal algorithm work in
                 practice. As we show here, a naive implementation of
                 the original algorithm would have a hard time dealing
                 with realistic workloads. We thus develop a new
                 algorithm and a series of optimizations that make
                 REmatch as fast or faster than many popular RegEx
                 engines while at the same time being able to return all
                 the outputs: a task that most other engines tend to
                 struggle with.",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2023:AAO,
  author =       "Junxiong Wang and Immanuel Trummer and Ahmet Kara and
                 Dan Olteanu",
  title =        "{ADOPT}: Adaptively Optimizing Attribute Orders for
                 Worst-Case Optimal Join Algorithms via Reinforcement
                 Learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2805--2817",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611489",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611489",
  abstract =     "The performance of worst-case optimal join algorithms
                 depends on the order in which the join attributes are
                 processed. Selecting good orders before query execution
                 is hard, due to the large space of possible orders and
                 unreliable execution cost \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hu:2023:TSM,
  author =       "Zheng Hu and Weiguo Zheng and Xiang Lian",
  title =        "Triangular Stability Maximization by Influence Spread
                 over Social Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2818--2831",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611490",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611490",
  abstract =     "In many real-world applications such as social network
                 analysis and online advertising/marketing, one of the
                 most important and popular problems is called influence
                 maximization (IM), which finds a set of k seed users
                 that maximize the expected number \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Guan:2023:CSE,
  author =       "Haoquan Guan and Ziling Chen and Shaoxu Song",
  title =        "{CORE-Sketch}: On Exact Computation of Median Absolute
                 Deviation with Limited Space",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2832--2844",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611491",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611491",
  abstract =     "Median absolute deviation (MAD), the median of the
                 absolute deviations from the median, has been found
                 useful in various applications such as outlier
                 detection. Together with median, MAD is more robust to
                 abnormal data than mean and standard deviation
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lulf:2023:FSC,
  author =       "Christian L{\"u}lf and Denis Mayr Lima Martins and
                 Marcos Antonio Vaz Salles and Yongluan Zhou and Fabian
                 Gieseke",
  title =        "Fast Search-by-Classification for Large-Scale
                 Databases Using Index-Aware Decision Trees and Random
                 Forests",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2845--2857",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611492",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611492",
  abstract =     "The vast amounts of data collected in various domains
                 pose great challenges to modern data exploration and
                 analysis. To find ``interesting'' objects in large
                 databases, users typically define a query using
                 positive and negative example objects and train a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Calautti:2023:SOC,
  author =       "Marco Calautti and Mostafa Milani and Andreas Pieris",
  title =        "Semi-Oblivious Chase Termination for Linear
                 Existential Rules: an Experimental Studya",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2858--2870",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611493",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611493",
  abstract =     "The chase procedure is a fundamental algorithmic tool
                 in databases that allows us to reason with constraints,
                 such as existential rules, with a plethora of
                 applications. It takes as input a database and a set of
                 constraints, and iteratively completes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2023:AIC,
  author =       "Kukjin Lee and Anshuman Dutt and Vivek Narasayya and
                 Surajit Chaudhuri",
  title =        "Analyzing the Impact of Cardinality Estimation on
                 Execution Plans in {Microsoft SQL} Server",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2871--2883",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611494",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611494",
  abstract =     "Cardinality estimation is widely believed to be one of
                 the most important causes of poor query plans. Prior
                 studies evaluate the impact of cardinality estimation
                 on plan quality on a set of Select-Project-Join queries
                 on PostgreSQL DBMS. Our empirical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lee:2023:WLZ,
  author =       "Jongsung Lee and Donguk Kim and Jae W. Lee",
  title =        "{WALTZ}: Leveraging Zone Append to Tighten the Tail
                 Latency of {LSM} Tree on {ZNS SSD}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2884--2896",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611495",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611495",
  abstract =     "We propose WALTZ, an LSM tree-based key-value store on
                 the emerging Zoned Namespace (ZNS) SSD. The key
                 contribution of WALTZ is to leverage the zone append
                 command, which is a recent addition to ZNS SSD
                 specifications, to provide tight tail latency. The
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Russo:2023:AAQ,
  author =       "Matthew Russo and Tatsunori Hashimoto and Daniel Kang
                 and Yi Sun and Matei Zaharia",
  title =        "Accelerating Aggregation Queries on Unstructured
                 Streams of Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2897--2910",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611496",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611496",
  abstract =     "Analysts and scientists are interested in querying
                 streams of video, audio, and text to extract
                 quantitative insights. For example, an urban planner
                 may wish to measure congestion by querying the live
                 feed from a traffic camera. Prior work has used deep
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Bai:2023:QIS,
  author =       "Qiushi Bai and Sadeem Alsudais and Chen Li",
  title =        "{QueryBooster}: Improving {SQL} Performance Using
                 Middleware Services for Human-Centered Query
                 Rewriting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2911--2924",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611497",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611497",
  abstract =     "SQL query performance is critical in database
                 applications, and query rewriting is a technique that
                 transforms an original query into an equivalent query
                 with a better performance. In a wide range of
                 database-supported systems, there is a unique problem
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2023:CRA,
  author =       "Jiongli Zhu and Sainyam Galhotra and Nazanin Sabri and
                 Babak Salimi",
  title =        "Consistent Range Approximation for Fair Predictive
                 Modeling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2925--2938",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611498",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611498",
  abstract =     "This paper proposes a novel framework for certifying
                 the fairness of predictive models trained on biased
                 data. It draws from query answering for incomplete and
                 inconsistent databases to formulate the problem of
                 consistent range approximation (CRA) of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yin:2023:SMW,
  author =       "Haoteng Yin and Muhan Zhang and Jianguo Wang and Pan
                 Li",
  title =        "{SUREL+}: Moving from Walks to Sets for Scalable
                 Subgraph-Based Graph Representation Learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2939--2948",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611499",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611499",
  abstract =     "Subgraph-based graph representation learning (SGRL)
                 has recently emerged as a powerful tool in many
                 prediction tasks on graphs due to its advantages in
                 model expressiveness and generalization ability. Most
                 previous SGRL models face computational issues
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2023:ESN,
  author =       "Hanzhi Wang and Zhewei Wei",
  title =        "Estimating Single-Node {PageRank} in {$ \tilde
                 {O}(\min d_t, \sqrt {m}) $} Time",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2949--2961",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611500",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611500",
  abstract =     "PageRank is a famous measure of graph centrality that
                 has numerous applications in practice. The problem of
                 computing a single node's PageRank has been the subject
                 of extensive research over a decade. However, existing
                 methods still incur large time complexities despite
                 years of efforts. Even on undirected graphs where
                 several valuable properties held by PageRank scores,
                 the problem of locally approximating the PageRank score
                 of a target node remains a challenging task. Two
                 commonly adopted techniques, Monte-Carlo based random
                 walks and backward push, both cost $O(n)$ time in the
                 worst-case scenario, which hinders existing methods
                 from achieving a sublinear time complexity like
                 $O(\sqrt{m})$ on an undirected graph with $n$ nodes and
                 $m$ edges.\par

                 In this paper, we focus on the problem of single-node
                 PageRank computation on undirected graphs. We propose a
                 novel algorithm, SetPush, for estimating single-node
                 PageRank specifically on undirected graphs. With
                 non-trivial analysis, we prove that our SetPush
                 achieves the $\tilde{O}(\min(d_, \sqrt{m}))$ time
                 complexity for estimating the target node $t$'s
                 PageRank with constant relative error and constant
                 failure probability on undirected graphs.  We conduct
                 comprehensive experiments to demonstrate the
                 effectiveness of SetPush.",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:SAQ,
  author =       "Yunjia Zhang and Yannis Chronis and Jignesh M. Patel
                 and Theodoros Rekatsinas",
  title =        "Simple Adaptive Query Processing vs. Learned Query
                 Optimizers: Observations and Analysis",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2962--2975",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611501",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611501",
  abstract =     "There have been many decades of work on optimizing
                 query processing in database management systems.
                 Recently, modern machine learning (ML), and
                 specifically reinforcement learning (RL), has gained
                 increased attention as a means to develop a query
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xu:2023:BTO,
  author =       "Helen Xu and Amanda Li and Brian Wheatman and Manoj
                 Marneni and Prashant Pandey",
  title =        "{BP-Tree}: Overcoming the Point-Range Operation
                 Tradeoff for In-Memory {B}-Trees",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2976--2989",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611502",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611502",
  abstract =     "B-trees are the go-to data structure for in-memory
                 indexes in databases and storage systems. B-trees
                 support both point operations (i.e., inserts and finds)
                 and range operations (i.e., iterators and maps).
                 However, there is an inherent tradeoff between \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lv:2023:HXT,
  author =       "Ge Lv and Chen Jason Zhang and Lei Chen",
  title =        "{HENCE-X}: Toward Heterogeneity-Agnostic Multi-Level
                 Explainability for Deep Graph Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "2990--3003",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611503",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611503",
  abstract =     "Deep graph networks (DGNs) have demonstrated their
                 outstanding effectiveness on both heterogeneous and
                 homogeneous graphs. However their black-box nature does
                 not allow human users to understand their working
                 mechanisms. Recently, extensive efforts have \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yuan:2023:ARE,
  author =       "Haitao Yuan and Sai Wang and Zhifeng Bao and
                 Shangguang Wang",
  title =        "Automatic Road Extraction with Multi-Source Data
                 Revisited: Completeness, Smoothness and
                 Discrimination",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "3004--3017",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611504",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611504",
  abstract =     "Extracting roads from multi-source data, such as
                 aerial images and vehicle trajectories, is an important
                 way to maintain road networks in the filed of urban
                 computing. In this paper, we revisit the problem of
                 road extraction and aim to boost its \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Fent:2023:ABQ,
  author =       "Philipp Fent and Guido Moerkotte and Thomas Neumann",
  title =        "Asymptotically Better Query Optimization Using Indexed
                 Algebra",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "3018--3030",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611505",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611505",
  abstract =     "Query optimization is essential for the efficient
                 execution of queries. The necessary analysis, if we can
                 and should apply optimizations and transform the query
                 plan, is already challenging. Traditional techniques
                 focus on the availability of columns at \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Skavantzos:2023:NPG,
  author =       "Philipp Skavantzos and Sebastian Link",
  title =        "Normalizing Property Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "3031--3043",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611506",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611506",
  abstract =     "Normalization aims at minimizing sources of potential
                 data inconsistency and costs of update maintenance
                 incurred by data redundancy. For relational databases,
                 different classes of dependencies cause data redundancy
                 and have resulted in proposals such \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2023:DDC,
  author =       "Chunwei Liu and Anna Pavlenko and Matteo Interlandi
                 and Brandon Haynes",
  title =        "A Deep Dive into Common Open Formats for Analytical
                 {DBMSs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "3044--3056",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611507",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611507",
  abstract =     "This paper evaluates the suitability of Apache Arrow,
                 Parquet, and ORC as formats for subsumption in an
                 analytical DBMS. We systematically identify and explore
                 the high-level features that are important to support
                 efficient querying in modern OLAP DBMSs \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2023:SDP,
  author =       "Zezhou Huang and Jiaxiang Liu and Daniel Gbenga Alabi
                 and Raul Castro Fernandez and Eugene Wu",
  title =        "{Saibot}: a Differentially Private Data Search
                 Platform",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "3057--3070",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611508",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611508",
  abstract =     "Recent data search platforms use ML task-based utility
                 measures rather than metadata-based keywords, to search
                 large dataset corpora. Requesters submit a training
                 dataset, and these platforms search for augmentations
                 ---join or union-compatible datasets--. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2023:JGT,
  author =       "Zezhou Huang and Rathijit Sen and Jiaxiang Liu and
                 Eugene Wu",
  title =        "{JoinBoost}: Grow Trees over Normalized Data Using
                 Only {SQL}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "11",
  pages =        "3071--3084",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611479.3611509",
  ISSN =         "2150-8097",
  bibdate =      "Fri Aug 25 07:25:43 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611479.3611509",
  abstract =     "Although dominant for tabular data, ML libraries that
                 train tree models over normalized databases (e.g.,
                 LightGBM, XGBoost) require the data to be denormalized
                 as a single table, materialized, and exported. This
                 process is not scalable, slow, and poses \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Tatemura:2023:PPP,
  author =       "Junichi Tatemura and Tao Zou and Jagan
                 Sankaranarayanan and Yanlai Huang and Jim Chen and Yupu
                 Zhang and Kevin Lai and Hao Zhang and Gokul Nath Babu
                 Manoharan and Goetz Graefe and Divyakant Agrawal and
                 Brad Adelberg and Shilpa Kolhar and Indrajit Roy",
  title =        "Progressive Partitioning for Parallelized Query
                 Execution in {Google}'s {Napa}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3475--3487",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611541",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611541",
  abstract =     "Napa holds Google's critical data warehouses in
                 log-structured merge trees for real-time data ingestion
                 and sub-second response for billions of queries per
                 day. These \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Depoutovitch:2023:TMB,
  author =       "Alex Depoutovitch and Chong Chen and Per-Ake Larson
                 and Jack Ng and Shu Lin and Guanzhu Xiong and Paul Lee
                 and Emad Boctor and Samiao Ren and Lengdong Wu and
                 Yuchen Zhang and Calvin Sun",
  title =        "{Taurus MM}: Bringing Multi-Master to the Cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3488--3500",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611542",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611542",
  abstract =     "A single-master database has limited update capacity
                 because a single node handles all updates. A
                 multi-master database potentially has higher update
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mao:2023:SCN,
  author =       "Yancan Mao and Zhanghao Chen and Yifan Zhang and Meng
                 Wang and Yong Fang and Guanghui Zhang and Rui Shi and
                 Richard T. B. Ma",
  title =        "{StreamOps}: Cloud-Native Runtime Management for
                 Streaming Services in {ByteDance}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3501--3514",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611543",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611543",
  abstract =     "Stream processing is widely used for real-time data
                 processing and decision-making, leading to tens of
                 thousands of streaming jobs deployed in ByteDance
                 cloud. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Anneser:2023:ALQ,
  author =       "Christoph Anneser and Nesime Tatbul and David Cohen
                 and Zhenggang Xu and Prithviraj Pandian and Nikolay
                 Laptev and Ryan Marcus",
  title =        "{AutoSteer}: Learned Query Optimization for Any {SQL}
                 Database",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3515--3527",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611544",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611544",
  abstract =     "This paper presents AutoSteer, a learning-based
                 solution that automatically drives query optimization
                 in any SQL database that exposes tunable optimizer
                 knobs. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:KRT,
  author =       "Jianjun Chen and Rui Shi and Heng Chen and Li Zhang
                 and Ruidong Li and Wei Ding and Liya Fan and Hao Wang
                 and Mu Xiong and Yuxiang Chen and Benchao Dong and
                 Kuankuan Guo and Yuanjin Lin and Xiao Liu and Haiyang
                 Shi and Peipei Wang and Zikang Wang and Yemeng Yang and
                 Junda Zhao and Dongyan Zhou and Zhikai Zuo and Yuming
                 Liang",
  title =        "{Krypton}: Real-Time Serving and Analytical {SQL}
                 Engine at {ByteDance}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3528--3542",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611545",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611545",
  abstract =     "In recent years, at ByteDance, we have started seeing
                 more and more business scenarios that require
                 performing real-time data serving besides complex Ad
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zou:2023:EVE,
  author =       "Yuanhang Zou and Zhihao Ding and Jieming Shi and
                 Shuting Guo and Chunchen Su and Yafei Zhang",
  title =        "{EmbedX}: a Versatile, Efficient and Scalable Platform
                 to Embed Both Graphs and High-Dimensional Sparse Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3543--3556",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611546",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611546",
  abstract =     "In modern online services, it is of growing importance
                 to process web-scale graph data and high-dimensional
                 sparse data together into embeddings for downstream
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Saxena:2023:SAG,
  author =       "Mohit Saxena and Benjamin Sowell and Daiyan Alamgir
                 and Nitin Bahadur and Bijay Bisht and Santosh
                 Chandrachood and Chitti Keswani and G. Krishnamoorthy
                 and Austin Lee and Bohou Li and Zach Mitchell and
                 Vaibhav Porwal and Maheedhar Reddy Chappidi and Brian
                 Ross and Noritaka Sekiyama and Omer Zaki and Linchi
                 Zhang and Mehul A. Shah",
  title =        "The Story of {AWS Glue}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3557--3569",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611547",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611547",
  abstract =     "AWS Glue is Amazon's serverless data integration cloud
                 service that makes it simple and cost effective to
                 extract, clean, enrich, load, and organize data.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:TGE,
  author =       "Yang Li and Huaijun Jiang and Yu Shen and Yide Fang
                 and Xiaofeng Yang and Danqing Huang and Xinyi Zhang and
                 Wentao Zhang and Ce Zhang and Peng Chen and Bin Cui",
  title =        "Towards General and Efficient Online Tuning for
                 {Spark}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3570--3583",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611548",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611548",
  abstract =     "The distributed data analytic system --- Spark is a
                 common choice for processing massive volumes of
                 heterogeneous data, while it is challenging to tune its
                 parameters \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:CBP,
  author =       "Jiashu Zhang and Wen Jiang and Bo Tang and Haoxiang Ma
                 and Lixun Cao and Zhongbin Jiang and Yuanyuan Nie and
                 Fan Wang and Lei Zhang and Yuming Liang",
  title =        "{CDSBen}: Benchmarking the Performance of Storage
                 Services in Cloud-Native Database System at
                 {ByteDance}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3584--3596",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611549",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611549",
  abstract =     "In this work, we focus on the performance benchmarking
                 problem of storage services in cloud-native database
                 systems, which are widely used in various cloud
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2023:FBR,
  author =       "Xuanhe Zhou and Cheng Chen and Kunyi Li and Bingsheng
                 He and Mian Lu and Qiaosheng Liu and Wei Huang and
                 Guoliang Li and Zhao Zheng and Yuqiang Chen",
  title =        "{FEBench}: a Benchmark for Real-Time Relational Data
                 Feature Extraction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3597--3609",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611550",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611550",
  abstract =     "As the use of online AI inference services rapidly
                 expands in various applications (e.g., fraud detection
                 in banking, product recommendation in e-commerce),
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xiao:2023:MDF,
  author =       "Fei Xiao and Yuncheng Wu and Meihui Zhang and Gang
                 Chen and Beng Chin Ooi",
  title =        "{MINT}: Detecting Fraudulent Behaviors from
                 Time-Series Relational Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3610--3623",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611551",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611551",
  abstract =     "The e-commerce platforms, such as Shopee, have
                 accumulated a huge volume of time-series relational
                 data, which contains useful information on
                 differentiating \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ahmad:2023:MPS,
  author =       "Shafi Ahmad and Dillidorai Arumugam and Srdan Bozovic
                 and Elnata Degefa and Sailesh Duvvuri and Steven Gott
                 and Nitish Gupta and Joachim Hammer and Nivedita
                 Kaluskar and Raghav Kaushik and Rakesh Khanduja and
                 Prasad Mujumdar and Gaurav Malhotra and Pankaj Naik and
                 Nikolas Ogg and Krishna Kumar Parthasarthy and Raghu
                 Ramakrishnan and Vlad Rodriguez and Rahul Sharma and
                 Jakub Szymaszek and Andreas Wolter",
  title =        "{Microsoft Purview}: a System for Central Governance
                 of Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3624--3635",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611552",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611552",
  abstract =     "Modern data estates are spread across data located on
                 premises, on the edge and in one or more public clouds,
                 spread across various sources like multiple relational
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2023:AAI,
  author =       "Liang Lin and Yuhan Li and Bin Wu and Huijun Mai and
                 Renjie Lou and Jian Tan and Feifei Li",
  title =        "{Anser}: Adaptive Information Sharing Framework of
                 {AnalyticDB}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3636--3648",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611553",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611553",
  abstract =     "The surge in data analytics has fostered burgeoning
                 demand for AnalyticDB on Alibaba Cloud, which has well
                 served thousands of customers from \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Brucke:2023:TAI,
  author =       "Christoph Br{\"u}cke and Philipp H{\"a}rtling and
                 Rodrigo D Escobar Palacios and Hamesh Patel and Tilmann
                 Rabl",
  title =        "{TPCx-AI} --- An Industry Standard Benchmark for
                 Artificial Intelligence and Machine Learning Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3649--3661",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611554",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611554",
  abstract =     "Artificial intelligence (AI) and machine learning (ML)
                 techniques have existed for years, but new hardware
                 trends and advances in model training and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Psallidas:2023:OEE,
  author =       "Fotis Psallidas and Ashvin Agrawal and Chandru Sugunan
                 and Khaled Ibrahim and Konstantinos Karanasos and
                 Jes{\'u}s Camacho-Rodr{\'{\i}}guez and Avrilia Floratou
                 and Carlo Curino and Raghu Ramakrishnan",
  title =        "{OneProvenance}: Efficient Extraction of Dynamic
                 Coarse-Grained Provenance from Database Query Event
                 Logs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3662--3675",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611555",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611555",
  abstract =     "Provenance encodes information that connects datasets,
                 their generation workflows, and associated metadata
                 (e.g., who or when executed a query). As \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Srinivasan:2023:TEB,
  author =       "V. Srinivasan and Andrew Gooding and Sunil Sayyaparaju
                 and Thomas Lopatic and Kevin Porter and Ashish Shinde
                 and B. Narendran",
  title =        "Techniques and Efficiencies from Building a Real-Time
                 {DBMS}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3676--3688",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611556",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611556",
  abstract =     "This paper describes a variety of techniques from over
                 a decade of developing Aerospike (formerly Citrusleaf),
                 a real-time DBMS that is being used in some of the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2023:RTW,
  author =       "Jiaqi Wang and Tianyi Li and Anni Wang and Xiaoze Liu
                 and Lu Chen and Jie Chen and Jianye Liu and Junyang Wu
                 and Feifei Li and Yunjun Gao",
  title =        "Real-Time Workload Pattern Analysis for Large-Scale
                 Cloud Databases",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3689--3701",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611557",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611557",
  abstract =     "Hosting database services on cloud systems has become
                 a common practice. This has led to the increasing
                 volume of database workloads, which provides the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:BDA,
  author =       "Jiang Li and Qi Xie and Yan Ma and Jian Ma and
                 Kunshang Ji and Yizhong Zhang and Chaojun Zhang and
                 Yixiu Chen and Gangsheng Wu and Jie Zhang and Kaidi
                 Yang and Xinyi He and Qiuyang Shen and Yanting Tao and
                 Haiwei Zhao and Penghui Jiao and Chengfei Zhu and David
                 Qian and Cheng Xu",
  title =        "Big Data Analytic Toolkit: a General-Purpose, Modular,
                 and Heterogeneous Acceleration Toolkit for Data
                 Analytical Engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3702--3714",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611558",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611558",
  abstract =     "Query compilation and hardware acceleration are
                 important technologies for optimizing the performance
                 of data processing engines. There have been many works
                 on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Shen:2023:LTC,
  author =       "Chunhui Shen and Qianyu Ouyang and Feibo Li and
                 Zhipeng Liu and Longcheng Zhu and Yujie Zou and Qing Su
                 and Tianhuan Yu and Yi Yi and Jianhong Hu and Cen Zheng
                 and Bo Wen and Hanbang Zheng and Lunfan Xu and Sicheng
                 Pan and Bin Wu and Xiao He and Ye Li and Jian Tan and
                 Sheng Wang and Dan Pei and Wei Zhang and Feifei Li",
  title =        "{Lindorm TSDB}: a Cloud-Native Time-Series Database
                 for Large-Scale Monitoring Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3715--3727",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611559",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611559",
  abstract =     "Internet services supported by large-scale distributed
                 systems have become essential for our daily life. To
                 ensure the stability and high quality of services,
                 diverse \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2023:OPH,
  author =       "Zhifeng Yang and Quanqing Xu and Shanyan Gao and
                 Chuanhui Yang and Guoping Wang and Yuzhong Zhao and
                 Fanyu Kong and Hao Liu and Wanhong Wang and Jinliang
                 Xiao",
  title =        "{OceanBase Paetica}: a Hybrid Shared-Nothing\slash
                 Shared-Everything Database for Supporting Single
                 Machine and Distributed Cluster",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3728--3740",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611560",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611560",
  abstract =     "In the ongoing evolution of the OceanBase database
                 system, it is essential to enhance its adaptability to
                 small-scale enterprises. The OceanBase database system
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yao:2023:SEU,
  author =       "Yuanyuan Yao and Dimeng Li and Hailiang Jie and
                 Hailiang Jie and Tianyi Li and Jie Chen and Jiaqi Wang
                 and Feifei Li and Yunjun Gao",
  title =        "{SimpleTS}: an Efficient and Universal Model Selection
                 Framework for Time Series Forecasting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3741--3753",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611561",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611561",
  abstract =     "Time series forecasting, that predicts events through
                 a sequence of time, has received increasing attention
                 in past decades. The diverse range of time series
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2023:PSC,
  author =       "Xinjun Yang and Yingqiang Zhang and Hao Chen and Chuan
                 Sun and Feifei Li and Wenchao Zhou",
  title =        "{PolarDB-SCC}: a Cloud-Native Database Ensuring Low
                 Latency for Strongly Consistent Reads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3754--3767",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611562",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611562",
  abstract =     "A classic design of cloud-native databases adopts an
                 architecture that consists of one read/write (RW) node
                 and one or more read-only (RO) nodes. In such a design,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yamada:2023:SUT,
  author =       "Hiroyuki Yamada and Toshihiro Suzuki and Yuji Ito and
                 Jun Nemoto",
  title =        "{ScalarDB}: Universal Transaction Manager for
                 Polystores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3768--3780",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611563",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611563",
  abstract =     "This paper presents ScalarDB, a universal transaction
                 manager that achieves distributed transactions across
                 multiple disparate databases. ScalarDB \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Nie:2023:APS,
  author =       "Xiaonan Nie and Yi Liu and Fangcheng Fu and Jinbao Xue
                 and Dian Jiao and Xupeng Miao and Yangyu Tao and Bin
                 Cui",
  title =        "{Angel-PTM}: a Scalable and Economical Large-Scale
                 Pre-Training System in {Tencent}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3781--3794",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611564",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611564",
  abstract =     "Recent years have witnessed the unprecedented
                 achievements of large-scale pre-trained models,
                 especially Transformer models. Many products and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:EEE,
  author =       "Ji You Li and Jiachi Zhang and Wenchao Zhou and Yuhang
                 Liu and Shuai Zhang and Zhuoming Xue and Ding Xu and
                 Hua Fan and Fangyuan Zhou and Feifei Li",
  title =        "{Eigen}: End-to-End Resource Optimization for
                 Large-Scale Databases on the Cloud",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3795--3807",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611565",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611565",
  abstract =     "Increasingly, cloud database vendors host large-scale
                 geographically distributed clusters to provide cloud
                 database services. When managing the clusters, we
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pan:2023:MUA,
  author =       "Zhicheng Pan and Yihang Wang and Yingying Zhang and
                 Sean Bin Yang and Yunyao Cheng and Peng Chen and
                 Chenjuan Guo and Qingsong Wen and Xiduo Tian and
                 Yunliang Dou and Zhiqiang Zhou and Chengcheng Yang and
                 Aoying Zhou and Bin Yang",
  title =        "{MagicScaler}: Uncertainty-Aware, Predictive
                 Autoscaling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3808--3821",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611566",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611566",
  abstract =     "Predictive autoscaling is a key enabler for optimizing
                 cloud resource allocation in Alibaba Cloud's computing
                 platforms, which dynamically adjust the Elastic Compute
                 Service",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Povzner:2023:KCN,
  author =       "Anna Povzner and Prince Mahajan and Jason Gustafson
                 and Jun Rao and Ismael Juma and Feng Min and Shriram
                 Sridharan and Nikhil Bhatia and Gopi Attaluri and
                 Adithya Chandra and Stanislav Kozlovski and Rajini
                 Sivaram and Lucas Bradstreet and Bob Barrett and
                 Dhruvil Shah and David Jacot and David Arthur and Ron
                 Dagostino and Colin McCabe and Manikumar Reddy Obili
                 and Kowshik Prakasam and Jose Garcia Sancio and Vikas
                 Singh and Alok Nikhil and Kamal Gupta",
  title =        "{Kora}: a Cloud-Native Event Streaming Platform for
                 {Kafka}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3822--3834",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611567",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611567",
  abstract =     "Event streaming is an increasingly critical
                 infrastructure service used in many industries and
                 there is growing demand for cloud-native solutions.
                 Confluent \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Pasupuleti:2023:ASE,
  author =       "Krishna Kantikiran Pasupuleti and Jiakun Li and Hong
                 Su and Mohamed Ziauddin",
  title =        "Automatic {SQL} Error Mitigation in {Oracle}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3835--3847",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611568",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611568",
  abstract =     "Despite best coding practices, software bugs are
                 inevitable in a large codebase. In traditional
                 databases, when errors occur during query processing,
                 they \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2023:PFE,
  author =       "Yanli Zhao and Andrew Gu and Rohan Varma and Liang Luo
                 and Chien-Chin Huang and Min Xu and Less Wright and
                 Hamid Shojanazeri and Myle Ott and Sam Shleifer and
                 Alban Desmaison and Can Balioglu and Pritam Damania and
                 Bernard Nguyen and Geeta Chauhan and Yuchen Hao and
                 Ajit Mathews and Shen Li",
  title =        "{PyTorch FSDP}: Experiences on Scaling Fully Sharded
                 Data Parallel",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "12",
  pages =        "3848--3860",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3611540.3611569",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Mon Sep 18 10:22:20 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3611540.3611569",
  abstract =     "It is widely acknowledged that large models have the
                 potential to deliver superior performance across a
                 broad range of domains. Despite the remarkable
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Qiu:2023:DDO,
  author =       "Lina Qiu and Georgios Kellaris and Nikos Mamoulis and
                 Kobbi Nissim and George Kollios",
  title =        "{Doquet}: Differentially Oblivious Range and Join
                 Queries with Private Data Structures",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4160--4173",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625055",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625055",
  abstract =     "Most cloud service providers offer limited data
                 privacy guarantees, discouraging clients from using
                 them for managing their sensitive data. Cloud providers
                 may use servers with Trusted Execution Environments
                 (TEEs) to protect outsourced data, while \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chiosa:2023:AAC,
  author =       "Monica Chiosa and Thomas B. Preu{\ss}er and Michaela
                 Blott and Gustavo Alonso",
  title =        "{AMNES}: Accelerating the Computation of Data
                 Correlation Using {FPGAs}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4174--7187",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625056",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625056",
  abstract =     "A widely used approach to characterize input data in
                 both databases and ML is computing the correlation
                 between attributes. The operation is supported by all
                 major database engines and ML platforms. However, it is
                 an expensive operation as the number of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Daum:2023:VPY,
  author =       "Maureen Daum and Enhao Zhang and Dong He and Stephen
                 Mussmann and Brandon Haynes and Ranjay Krishna and
                 Magdalena Balazinska",
  title =        "{VOCALExplore}: Pay-as-You-Go Video Data Exploration
                 and Model Building",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4188--4201",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625057",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625057",
  abstract =     "We introduce VOCALExplore, a system designed to
                 support users in building domain-specific models over
                 video datasets. VOCALExplore supports interactive
                 labeling sessions and trains models using user-supplied
                 labels. VOCALExplore maximizes model quality \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Arora:2023:FRA,
  author =       "Pankaj Arora and Surajit Chaudhuri and Sudipto Das and
                 Junfeng Dong and Cyril George and Ajay Kalhan and Arnd
                 Christian K{\"o}nig and Willis Lang and Changsong Li
                 and Feng Li and Jiaqi Liu and Lukas M. Maas and Akshay
                 Mata and Ishai Menache and Justin Moeller and Vivek
                 Narasayya and Matthaios Olma and Morgan Oslake and
                 Elnaz Rezai and Yi Shan and Manoj Syamala and Shize Xu
                 and Vasileios Zois",
  title =        "Flexible Resource Allocation for Relational
                 Database-as-a-Service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4202--4215",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625058",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625058",
  abstract =     "Oversubscription is an essential cost management
                 strategy for cloud database providers, and its
                 importance is magnified by the emerging paradigm of
                 serverless databases. In contrast to general purpose
                 techniques used for oversubscription in hypervisors,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Gu:2023:SEA,
  author =       "Rong Gu and Han Li and Haipeng Dai and Wenjie Huang
                 and Jie Xue and Meng Li and Jiaqi Zheng and Haoran Cai
                 and Yihua Huang and Guihai Chen",
  title =        "{ShadowAQP}: Efficient Approximate Group-by and Join
                 Query via Attribute-Oriented Sample Size Allocation and
                 Data Generation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4216--4229",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625059",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625059",
  abstract =     "Approximate query processing (AQP) is one of the key
                 techniques to cope with big data querying problem on
                 account that it obtains approximate answers
                 efficiently. To address non-trivial sample selection
                 and heavy sampling cost issues in AQP, we propose
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liu:2023:ODP,
  author =       "Rui Liu and Kwanghyun Park and Fotis Psallidas and
                 Xiaoyong Zhu and Jinghui Mo and Rathijit Sen and Matteo
                 Interlandi and Konstantinos Karanasos and Yuanyuan Tian
                 and Jes{\'u}s Camacho-Rodr{\'\i}guez",
  title =        "Optimizing Data Pipelines for Machine Learning in
                 Feature Stores",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4230--4239",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625060",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625060",
  abstract =     "Data pipelines (i.e., converting raw data to features)
                 are critical for machine learning (ML) models, yet
                 their development and management is time-consuming.
                 Feature stores have recently emerged as a new
                 ``DBMS-for-ML'' with the premise of enabling data
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Angles:2023:SSE,
  author =       "Renzo Angles and Georg Gottlob and Aleksandar
                 Pavlovi{\'c} and Reinhard Pichler and Emanuel
                 Sallinger",
  title =        "{SparqLog}: a System for Efficient Evaluation of
                 {SPARQL 1.1} Queries via {Datalog}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4240--4253",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625061",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625061",
  abstract =     "Over the past decade, Knowledge Graphs have received
                 enormous interest both from industry and from academia.
                 Research in this area has been driven, above all, by
                 the Database (DB) community and the Semantic Web (SW)
                 community. However, there still \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Konig:2023:SLC,
  author =       "Arnd Christian K{\"o}nig and Yi Shan and Karan Newatia
                 and Luke Marshall and Vivek Narasayya",
  title =        "Solver-In-The-Loop Cluster Resource Management for
                 Database-as-a-Service",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4254--4267",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625062",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625062",
  abstract =     "In Database-as-a-Service (DBaaS) clusters, resource
                 management is a complex optimization problem that
                 assigns tenants to nodes, subject to various
                 constraints and objectives. Tenants share resources
                 within a node, however, their resource demands can
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Henneberg:2023:REH,
  author =       "Justus Henneberg and Felix Schuhknecht",
  title =        "{RTIndeX}: Exploiting Hardware-Accelerated {GPU}
                 Raytracing for Database Indexing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4268--4281",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625063",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625063",
  abstract =     "Data management on GPUs has become increasingly
                 relevant due to a tremendous rise in processing power
                 and available GPU memory. Similar to main-memory
                 systems, there is a need for performant GPU-resident
                 index structures to speed up query processing.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lian:2023:CCT,
  author =       "Jinqing Lian and Xinyi Zhang and Yingxia Shao and
                 Zenglin Pu and Qingfeng Xiang and Yawen Li and Bin
                 Cui",
  title =        "{ContTune}: Continuous Tuning by Conservative
                 {Bayesian} Optimization for Distributed Stream Data
                 Processing Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4282--4295",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625064",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625064",
  abstract =     "The past decade has seen rapid growth of distributed
                 stream data processing systems. Under these systems, a
                 stream application is realized as a Directed Acyclic
                 Graph (DAG) of operators, where the level of
                 parallelism of each operator has a substantial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Melissourgos:2023:SUS,
  author =       "Dimitrios Melissourgos and Haibo Wang and Shigang Chen
                 and Chaoyi Ma and Shiping Chen",
  title =        "Single Update Sketch with Variable Counter Structure",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4296--4309",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625065",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625065",
  abstract =     "Per-flow size measurement is key to many streaming
                 applications and management systems, particularly in
                 high-speed networks. Performing such measurement on the
                 data plane of a network device at the line rate
                 requires on-chip memory and computing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Trummer:2023:CLL,
  author =       "Immanuel Trummer",
  title =        "Can Large Language Models Predict Data Correlations
                 from Column Names?",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4310--4323",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625066",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625066",
  abstract =     "Recent publications suggest using natural language
                 analysis on database schema elements to guide tuning
                 and profiling efforts. The underlying hypothesis is
                 that state-of-the-art language processing methods,
                 so-called language models, are able to extract
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chamani:2023:GTO,
  author =       "Javad Ghareh Chamani and Ioannis Demertzis and
                 Dimitrios Papadopoulos and Charalampos Papamanthou and
                 Rasool Jalili",
  title =        "{GraphOS}: Towards Oblivious Graph Processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4324--4338",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625067",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625067",
  abstract =     "We propose GraphOS, a system that allows a client that
                 owns a graph database to outsource it to an untrusted
                 server for storage and querying. It relies on
                 doubly-oblivious primitives and trusted hardware to
                 achieve a very strong privacy and efficiency \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2023:COC,
  author =       "Kefei Wang and Feng Chen",
  title =        "{Catalyst}: Optimizing Cache Management for Large
                 In-memory Key-value Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "16",
  number =       "13",
  pages =        "4339--4352",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3625054.3625068",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:04 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3625054.3625068",
  abstract =     "In-memory key-value cache systems, such as Memcached
                 and Redis, are essential in today's data centers. A key
                 mission of such cache systems is to identify the most
                 valuable data for caching. To achieve this, the current
                 system design keeps track of each \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

%% DEBUG 1: do_journal(): Journal = [PVLDB: Vol 17, No 1]
%% DEBUG 2: do_journal(): Journal = [PVLDB]

@Article{Zheng:2023:DDL,
  author =       "Bolong Zheng and Yongyong Gao and Jingyi Wan and
                 Lingsen Yan and Long Hu and Bo Liu and Yunjun Gao and
                 Xiaofang Zhou and Christian S. Jensen",
  title =        "{DecLog}: Decentralized Logging in Non-Volatile Memory
                 for Time Series Database Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "1",
  pages =        "1--14",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3617838.3617839",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:06 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3617838.3617839",
  abstract =     "Growing demands for the efficient processing of
                 extreme-scale time series workloads call for more
                 capable time series database management systems
                 (TSDBMS). Specifically, to maintain consistency and
                 durability of transaction processing, systems employ
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:EDW,
  author =       "Fangyuan Zhang and Mengxu Jiang and Sibo Wang",
  title =        "Efficient Dynamic Weighted Set Sampling and Its
                 Extension",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "1",
  pages =        "15--27",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3617838.3617840",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:06 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3617838.3617840",
  abstract =     "Given a weighted set S of n elements, weighted set
                 sampling (WSS) samples an element in S so that each
                 element a$_i$; is sampled with a probability
                 proportional to its weight w ( a$_i$ ). The classic
                 alias method pre-processes an index in O ( n ) time
                 with O ( n ) \ldots{} ",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2023:ZLI,
  author =       "Yiming Lin and Sharad Mehrotra",
  title =        "{ZIP}: Lazy Imputation during Query Processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "1",
  pages =        "28--40",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3617838.3617841",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:06 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3617838.3617841",
  abstract =     "This paper develops a query-time missing value
                 imputation framework, entitled ZIP, that modifies
                 relational operators to be imputation aware in order to
                 minimize the joint cost of imputing and query
                 processing. The modified operators use a cost-based
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:FTA,
  author =       "Xunkai Li and Zhengyu Wu and Wentao Zhang and Yinlin
                 Zhu and Rong-Hua Li and Guoren Wang",
  title =        "{FedGTA}: Topology-Aware Averaging for Federated Graph
                 Learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "1",
  pages =        "41--50",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3617838.3617842",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:06 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3617838.3617842",
  abstract =     "Federated Graph Learning (FGL) is a distributed
                 machine learning paradigm that enables collaborative
                 training on large-scale subgraphs across multiple local
                 systems. Existing FGL studies fall into two categories:
                 (i) FGL Optimization, which improves \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chang:2023:HPM,
  author =       "Xueqin Chang and Xiangyu Ke and Lu Chen and Congcong
                 Ge and Ziheng Wei and Yunjun Gao",
  title =        "Host Profit Maximization: Leveraging Performance
                 Incentives and User Flexibility",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "1",
  pages =        "51--64",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3617838.3617843",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:06 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3617838.3617843",
  abstract =     "The social network host has knowledge of the network
                 structure and user characteristics and can earn a
                 profit by providing merchants with viral marketing
                 campaigns. We investigate the problem of host profit
                 maximization by leveraging performance \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Patwa:2023:DPP,
  author =       "Shweta Patwa and Danyu Sun and Amir Gilad and Ashwin
                 Machanavajjhala and Sudeepa Roy",
  title =        "{DP-PQD}: Privately Detecting Per-Query Gaps in
                 Synthetic Data Generated by Black-Box Mechanisms",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "1",
  pages =        "65--78",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3617838.3617844",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 5 08:24:06 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3617838.3617844",
  abstract =     "Synthetic data generation methods, and in particular,
                 private synthetic data generation methods, are gaining
                 popularity as a means to make copies of sensitive
                 databases that can be shared widely for research and
                 data analysis. Some of the fundamental \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wei:2023:CSP,
  author =       "Ruidi Wei and Florian Kerschbaum",
  title =        "Cryptographically Secure Private Record Linkage using
                 Locality-Sensitive Hashing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "79--91",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626293",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626293",
  abstract =     "Private record linkage (PRL) is the problem of
                 identifying pairs of records that approximately match
                 across datasets in a secure, privacy-preserving manner.
                 Two-party PRL specifically allows each of the parties
                 to obtain records from the other party, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Arora:2023:LME,
  author =       "Simran Arora and Brandon Yang and Sabri Eyuboglu and
                 Avanika Narayan and Andrew Hojel and Immanuel Trummer
                 and Christopher R{\'e}",
  title =        "Language Models Enable Simple Systems for Generating
                 Structured Views of Heterogeneous Data Lakes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "92--105",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626294",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626294",
  abstract =     "A long standing goal in the data management community
                 is developing systems that input documents and output
                 queryable tables without user effort. Given the sheer
                 variety of potential documents, state-of-the art
                 systems make simplifying assumptions and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:QRD,
  author =       "Jinyang Li and Yuval Moskovitch and Julia Stoyanovich
                 and H. V. Jagadish",
  title =        "Query Refinement for Diversity Constraint
                 Satisfaction",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "106--118",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626295",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626295",
  abstract =     "Diversity, group representation, and similar needs
                 often apply to query results, which in turn require
                 constraints on the sizes of various subgroups in the
                 result set. Traditional relational queries only specify
                 conditions as part of the query predicate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:EEL,
  author =       "Zhaoheng Li and Pranav Gor and Rahul Prabhu and Hui Yu
                 and Yuzhou Mao and Yongjoo Park",
  title =        "{ElasticNotebook}: Enabling Live Migration for
                 Computational Notebooks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "119--133",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626296",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626296",
  abstract =     "Computational notebooks (e.g., Jupyter, Google Colab)
                 are widely used for interactive data science and
                 machine learning. In those frameworks, users can start
                 a session, then execute cells (i.e., a set of
                 statements) to create variables, train models,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2023:BNL,
  author =       "Kecheng Huang and Zhaoyan Shen and Zili Shao and Tong
                 Zhang and Feng Chen",
  title =        "Breathing New Life into an Old Tree: Resolving Logging
                 Dilemma of {B$^+$}-tree on Modern Computational Storage
                 Drives",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "134--147",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626297",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626297",
  abstract =     "Having dominated databases and various data management
                 systems for decades, B$^+$-tree is infamously subject
                 to a logging dilemma: One could improve B$^+$-tree
                 speed performance by equipping it with a larger log,
                 which nevertheless will degrade its crash \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zeng:2023:EEC,
  author =       "Xinyu Zeng and Yulong Hui and Jiahong Shen and Andrew
                 Pavlo and Wes McKinney and Huanchen Zhang",
  title =        "An Empirical Evaluation of Columnar Storage Formats",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "148--161",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626298",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626298",
  abstract =     "Columnar storage is a core component of a modern data
                 analytics system. Although many database management
                 systems (DBMSs) have proprietary storage formats, most
                 provide extensive support to open-source storage
                 formats such as Parquet and ORC to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yuan:2023:EGA,
  author =       "Yichao Yuan and Haojie Ye and Sanketh Vedula and Wynn
                 Kaza and Nishil Talati",
  title =        "{Everest}: {GPU}-Accelerated System for Mining
                 Temporal Motifs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "162--174",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626299",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626299",
  abstract =     "Temporal motif mining is the task of finding the
                 occurrences of subgraph patterns within a large input
                 temporal graph that obey the specified structural and
                 temporal constraints. Despite its utility in several
                 critical application domains that demand \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2023:BSB,
  author =       "Xueyi Wu and Yuanyuan Xu and Wenjie Zhang and Ying
                 Zhang",
  title =        "Billion-Scale Bipartite Graph Embedding: a
                 Global-Local Induced Approach",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "175--183",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626300",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626300",
  abstract =     "Bipartite graph embedding (BGE), as the fundamental
                 task in bipartite network analysis, is to map each node
                 to compact low-dimensional vectors that preserve
                 intrinsic properties. The existing solutions towards
                 BGE fall into two groups: metric-based \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ni:2023:UAP,
  author =       "Wangze Ni and Pengze Chen and Lei Chen and Peng Cheng
                 and Chen Jason Zhang and Xuemin Lin",
  title =        "Utility-Aware Payment Channel Network Rebalance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "184--196",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626301",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626301",
  abstract =     "The payment channel network (PCN) is a promising
                 solution to increase the throughput of blockchains.
                 However, unidirectional transactions can deplete a
                 user's deposits in a payment channel (PC), reducing the
                 success ratio of transactions (SRoT). To \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:AAB,
  author =       "Pengfei Li and Wenqing Wei and Rong Zhu and Bolin Ding
                 and Jingren Zhou and Hua Lu",
  title =        "{ALECE}: an Attention-based Learned Cardinality
                 Estimator for {SPJ} Queries on Dynamic Workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "197--210",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626302",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626302",
  abstract =     "For efficient query processing, DBMS query optimizers
                 have for decades relied on delicate cardinality
                 estimation methods. In this work, we propose an
                 Attention-based LEarned Cardinality Estimator (ALECE
                 for short) for SPJ queries. The core idea is to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Xia:2023:FLE,
  author =       "Haojun Xia and Zhen Zheng and Yuchao Li and Donglin
                 Zhuang and Zhongzhu Zhou and Xiafei Qiu and Yong Li and
                 Wei Lin and Shuaiwen Leon Song",
  title =        "{Flash-LLM}: Enabling Cost-Effective and
                 Highly-Efficient Large Generative Model Inference with
                 Unstructured Sparsity",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "211--224",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626303",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626303",
  abstract =     "With the fast growth of parameter size, it becomes
                 increasingly challenging to deploy large generative
                 models as they typically require large GPU memory
                 consumption and massive computation. Unstructured model
                 pruning has been a common approach to reduce \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Howard:2023:CCF,
  author =       "Heidi Howard and Fritz Alder and Edward Ashton and
                 Amaury Chamayou and Sylvan Clebsch and Manuel Costa and
                 Antoine Delignat-Lavaud and C{\'e}dric Fournet and
                 Andrew Jeffery and Matthew Kerner and Fotios Kounelis
                 and Markus A. Kuppe and Julien Maffre and Mark
                 Russinovich and Christoph M. Wintersteiger",
  title =        "Confidential Consortium Framework: Secure Multiparty
                 Applications with Confidentiality, Integrity, and High
                 Availability",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "225--240",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626304",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626304",
  abstract =     "Confidentiality, integrity protection, and high
                 availability, abbreviated to CIA, are essential
                 properties for trustworthy data systems. The rise of
                 cloud computing and the growing demand for multiparty
                 applications however means that building modern
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Duan:2023:VVL,
  author =       "Sijing Duan and Feng Lyu and Xin Zhu and Yi Ding and
                 Haotian Wang and Desheng Zhang and Xue Liu and Yaoxue
                 Zhang and Ju Ren",
  title =        "{VeLP}: Vehicle Loading Plan Learning from Human
                 Behavior in Nationwide Logistics System",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "241--249",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626305",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626305",
  abstract =     "For a nationwide logistics transportation system, it
                 is critical to make the vehicle loading plans (i.e.,
                 given many packages, deciding vehicle types and
                 numbers) at each sorting and distribution center. This
                 task is currently completed by dispatchers \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Naik:2023:RQS,
  author =       "Aaditya Naik and Aalok Thakkar and Adam Stein and
                 Rajeev Alur and Mayur Naik",
  title =        "Relational Query Synthesis $ \bowtie $ Decision Tree
                 Learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "2",
  pages =        "250--263",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3626292.3626306",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Tue Dec 12 09:42:35 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3626292.3626306",
  abstract =     "We study the problem of synthesizing a core fragment
                 of relational queries called select-project-join (SPJ)
                 queries from input-output examples. Search-based
                 synthesis techniques are suited to synthesizing
                 projections and joins by navigating the network
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yao:2023:RRA,
  author =       "Feng Yao and Qian Tao and Wenyuan Yu and Yanfeng Zhang
                 and Shufeng Gong and Qiange Wang and Ge Yu and Jingren
                 Zhou",
  title =        "{RAGraph}: a Region-Aware Framework for
                 Geo-Distributed Graph Processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "264--277",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632094",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632094",
  abstract =     "In many global businesses of multinational
                 enterprises, graph-structure data is usually
                 geographically distributed in different regions to
                 support low-latency services. Geo-distributed graph
                 processing suffers from the Wide Area Networks (WANs)
                 with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Lin:2023:SDB,
  author =       "Qiuru Lin and Sai Wu and Junbo Zhao and Jian Dai and
                 Meng Shi and Gang Chen and Feifei Li",
  title =        "{SmartLite}: a {DBMS-Based} Serving System for {DNN}
                 Inference in Resource-Constrained Environments",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "278--291",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632095",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632095",
  abstract =     "Many IoT applications require the use of multiple deep
                 neural networks (DNNs) to perform various tasks on
                 low-cost edge devices with limited computation
                 resources. However, existing DNN model serving
                 platforms, such as TensorFlow Serving and TorchServe,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wu:2023:BMC,
  author =       "Shiwen Wu and Qiyu Wu and Honghua Dong and Wen Hua and
                 Xiaofang Zhou",
  title =        "Blocker and Matcher Can Mutually Benefit: a
                 Co-Learning Framework for Low-Resource Entity
                 Resolution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "292--304",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632096",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632096",
  abstract =     "Entity resolution (ER) approaches typically consist of
                 a blocker and a matcher. They share the same goal and
                 cooperate in different roles: the blocker first quickly
                 removes obvious non-matches, and the matcher
                 subsequently determines whether the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ang:2023:TTS,
  author =       "Yihao Ang and Qiang Huang and Yifan Bao and Anthony K.
                 H. Tung and Zhiyong Huang",
  title =        "{TSGBench}: Time Series Generation Benchmark",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "305--318",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632097",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632097",
  abstract =     "Synthetic Time Series Generation (TSG) is crucial in a
                 range of applications, including data augmentation,
                 anomaly detection, and privacy preservation. Although
                 significant strides have been made in this field,
                 existing methods exhibit three key \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Punter:2023:OEM,
  author =       "Wieger R. Punter and Odysseas Papapetrou and Minos
                 Garofalakis",
  title =        "{OmniSketch}: Efficient Multi-Dimensional
                 High-Velocity Stream Analytics with Arbitrary
                 Predicates",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "319--331",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632098",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632098",
  abstract =     "A key need in different disciplines is to perform
                 analytics over fast-paced data streams, similar in
                 nature to the traditional OLAP analytics in relational
                 databases --- i.e., with filters and aggregates.
                 Storing unbounded streams, however, is not a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chung:2023:MBK,
  author =       "Kai Hiu Chung and Alexander Zhou and Yue Wang and Lei
                 Chen",
  title =        "Maximum Balanced $ (k, \epsilon)$-Bitruss Detection in
                 Signed Bipartite Graph",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "332--344",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632099",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632099",
  abstract =     "Signed bipartite graphs represent relationships
                 between two sets of entities, including both positive
                 and negative interactions, allowing for a more
                 comprehensive modeling of real-world networks. In this
                 work, we focus on the detection of cohesive \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:MVI,
  author =       "Xiao Li and Huan Li and Hua Lu and Christian S. Jensen
                 and Varun Pandey and Volker Markl",
  title =        "Missing Value Imputation for Multi-Attribute Sensor
                 Data Streams via Message Propagation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "345--358",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632100",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632100",
  abstract =     "Sensor data streams occur widely in various real-time
                 applications in the context of the Internet of Things
                 (IoT). However, sensor data streams feature missing
                 values due to factors such as sensor failures,
                 communication errors, or depleted batteries. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:IID,
  author =       "Yuhang Chen and Chaoyun Zhang and Minghua Ma and
                 Yudong Liu and Ruomeng Ding and Bowen Li and Shilin He
                 and Saravan Rajmohan and Qingwei Lin and Dongmei
                 Zhang",
  title =        "{ImDiffusion}: Imputed Diffusion Models for
                 Multivariate Time Series Anomaly Detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "359--372",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632101",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632101",
  abstract =     "Anomaly detection in multivariate time series data is
                 of paramount importance for large-scale systems.
                 However, accurately detecting anomalies in such data
                 poses significant challenges due to the need for
                 precise data modeling capability. Existing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Sun:2023:CIP,
  author =       "Dajun Sun and Wei Dong and Ke Yi",
  title =        "Confidence Intervals for Private Query Processing",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "373--385",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632102",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632102",
  abstract =     "Whenever randomness is involved in query processing,
                 confidence intervals are commonly returned to the user
                 to indicate the statistical significance of the query
                 answer. However, this problem has not been explicitly
                 addressed under differential privacy, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Liang:2023:SBF,
  author =       "Zhiyu Liang and Jianfeng Zhang and Chen Liang and
                 Hongzhi Wang and Zheng Liang and Lujia Pan",
  title =        "A Shapelet-Based Framework for Unsupervised
                 Multivariate Time Series Representation Learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "386--399",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632103",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632103",
  abstract =     "Recent studies have shown great promise in
                 unsupervised representation learning (URL) for
                 multivariate time series, because URL has the
                 capability in learning generalizable representation for
                 many downstream tasks without using inaccessible
                 labels. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wang:2023:FSE,
  author =       "Letong Wang and Xiangyun Ding and Yan Gu and Yihan
                 Sun",
  title =        "Fast and Space-Efficient Parallel Algorithms for
                 Influence Maximization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "400--413",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632104",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632104",
  abstract =     "Influence Maximization (IM) is a crucial problem in
                 data science. The goal is to find a fixed-size set of
                 highly influential seed vertices on a network to
                 maximize the influence spread along the edges. While IM
                 is NP-hard on commonly used diffusion \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:TEF,
  author =       "Yile Chen and Gao Cong and Cuauhtemoc Anda",
  title =        "{TERI}: an Effective Framework for Trajectory Recovery
                 with Irregular Time Intervals",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "414--426",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632105",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632105",
  abstract =     "The proliferation of trajectory data has facilitated
                 various applications in urban spaces, such as travel
                 time estimation, traffic monitoring, and flow
                 prediction. These applications require a substantial
                 volume of high-quality trajectories as the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:DGS,
  author =       "Yuhan Chen and Haojie Ye and Sanketh Vedula and Alex
                 Bronstein and Ronald Dreslinski and Trevor Mudge and
                 Nishil Talati",
  title =        "Demystifying Graph Sparsification Algorithms in Graph
                 Properties Preservation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "427--440",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632106",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632106",
  abstract =     "Graph sparsification is a technique that approximates
                 a given graph by a sparse graph with a subset of
                 vertices and/or edges. The goal of an effective
                 sparsification algorithm is to maintain specific graph
                 properties relevant to the downstream task while
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cao:2023:GDS,
  author =       "Jiashen Cao and Rathijit Sen and Matteo Interlandi and
                 Joy Arulraj and Hyesoon Kim",
  title =        "{GPU} Database Systems Characterization and
                 Optimization",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "441--454",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632107",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632107",
  abstract =     "GPUs offer massive parallelism and high-bandwidth
                 memory access, making them an attractive option for
                 accelerating data analytics in database systems.
                 However, while modern GPUs possess more resources than
                 ever before (e.g., higher DRAM bandwidth), \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:NDG,
  author =       "Chaoyi Chen and Dechao Gao and Yanfeng Zhang and
                 Qiange Wang and Zhenbo Fu and Xuecang Zhang and Junhua
                 Zhu and Yu Gu and Ge Yu",
  title =        "{NeutronStream}: a Dynamic {GNN} Training Framework
                 with Sliding Window for Graph Streams",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "455--468",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632108",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632108",
  abstract =     "Existing Graph Neural Network (GNN) training
                 frameworks have been designed to help developers easily
                 create performant GNN implementations. However, most
                 existing GNN frameworks assume that the input graphs
                 are static, but ignore that most real-world \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Hildred:2023:CLL,
  author =       "Joshua Hildred and Michael Abebe and Khuzaima
                 Daudjee",
  title =        "{Caerus}: Low-Latency Distributed Transactions for
                 Geo-Replicated Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "469--482",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632109",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632109",
  abstract =     "Distributed deterministic database systems achieve
                 high transaction throughput for geographically
                 replicated data. Supporting transactions with ACID
                 guarantees requires deterministic databases to order
                 transactions globally to dictate execution order. In
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:EEA,
  author =       "Aoqian Zhang and Shuqing Deng and Dongping Cui and Ye
                 Yuan and Guoren Wang",
  title =        "An Experimental Evaluation of Anomaly Detection in
                 Time Series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "483--496",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632110",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632110",
  abstract =     "Anomaly detection in time series data has been studied
                 for decades in both statistics and computer science.
                 Various algorithms have been proposed for different
                 scenarios, such as fraud detection, environmental
                 monitoring, manufacturing, and healthcare. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Singh:2023:FAE,
  author =       "Mukul Singh and Jos{\'e} Cambronero and Sumit Gulwani
                 and Vu Le and Carina Negreanu and Elnaz Nouri and
                 Mohammad Raza and Gust Verbruggen",
  title =        "{FormaT5}: Abstention and Examples for Conditional
                 Table Formatting with Natural Language",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "497--510",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632111",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632111",
  abstract =     "Formatting is an important property in tables for
                 visualization, presentation, and analysis. Spreadsheet
                 software allows users to automatically format their
                 tables by writing data-dependent conditional formatting
                 (CF) rules. Writing such rules is often \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Schonberger:2023:QID,
  author =       "Manuel Sch{\"o}nberger and Immanuel Trummer and
                 Wolfgang Mauerer",
  title =        "Quantum-Inspired Digital Annealing for Join Ordering",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "511--524",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632112",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632112",
  abstract =     "Finding the optimal join order (JO) is one of the most
                 important problems in query optimisation, and has been
                 extensively considered in research and practise. As it
                 involves huge search spaces, approximation approaches
                 and heuristics are commonly used, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Musleh:2023:KSB,
  author =       "Mashaal Musleh and Mohamed F. Mokbel",
  title =        "{Kamel}: a Scalable {BERT}-Based System for Trajectory
                 Imputation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "525--538",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632113",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632113",
  abstract =     "Numerous important applications rely on detailed
                 trajectory data. Yet, unfortunately, trajectory
                 datasets are typically sparse with large spatial and
                 temporal gaps between each two points, which is a major
                 hurdle for their accuracy. This paper presents
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:ETL,
  author =       "Xinyi Zhang and Hong Wu and Yang Li and Zhengju Tang
                 and Jian Tan and Feifei Li and Bin Cui",
  title =        "An Efficient Transfer Learning Based Configuration
                 Adviser for Database Tuning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "539--552",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632114",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632114",
  abstract =     "In recent years, a wide spectrum of database tuning
                 systems have emerged to automatically optimize database
                 performance. However, these systems require a
                 significant number of workload runs to deliver a
                 satisfactory level of database performance, which
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Petralia:2023:ATT,
  author =       "Adrien Petralia and Philippe Charpentier and Themis
                 Palpanas",
  title =        "{ADF \& TransApp}: a Transformer-Based Framework for
                 Appliance Detection Using Smart Meter Consumption
                 Series",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "553--562",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632115",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632115",
  abstract =     "Over the past decade, millions of smart meters have
                 been installed by electricity suppliers worldwide,
                 allowing them to collect a large amount of electricity
                 consumption data, albeit sampled at a low frequency
                 (one point every 30min). One of the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Wooders:2023:RAA,
  author =       "Sarah Wooders and Xiangxi Mo and Amit Narang and Kevin
                 Lin and Ion Stoica and Joseph M. Hellerstein and
                 Natacha Crooks and Joseph E. Gonzalez",
  title =        "{RALF}: Accuracy-Aware Scheduling for Feature Store
                 Maintenance",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "563--576",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632116",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632116",
  abstract =     "Feature stores (also sometimes referred to as
                 embedding stores) are becoming ubiquitous in model
                 serving systems: downstream applications query these
                 stores for auxiliary inputs at inference-time. Stored
                 features are derived by featurizing rapidly \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Huang:2023:ALH,
  author =       "Kaisong Huang and Tianzheng Wang and Qingqing Zhou and
                 Qingzhong Meng",
  title =        "The Art of Latency Hiding in Modern Database Engines",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "577--590",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632117",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632117",
  abstract =     "Modern database engines must well use multicore CPUs,
                 large main memory and fast storage devices to achieve
                 high performance. A common theme is hiding latencies
                 such that more CPU cycles can be dedicated to ``real''
                 work, improving overall throughput. Yet \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Najafi:2023:MSN,
  author =       "Mohammad Matin Najafi and Chenhao Ma and Xiaodong Li
                 and Reynold Cheng and Laks V. S. Lakshmanan",
  title =        "{MOSER}: Scalable Network {Motif} Discovery Using
                 Serial Test",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "591--603",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632118",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632118",
  abstract =     "Given a graph G, a motif (e.g., 3-node clique) is a
                 fundamental building block for G. Recently, motif-based
                 graph analysis has attracted much attention due to its
                 efficacy in tasks such as clustering, ranking, and link
                 prediction. These tasks require \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:CMP,
  author =       "Dongxiang Zhang and Teng Ma and Junnan Hu and Yijun
                 Bei and Kian-Lee Tan and Gang Chen",
  title =        "Co-Movement Pattern Mining from Videos",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "3",
  pages =        "604--616",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3632093.3632119",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:36:59 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3632093.3632119",
  abstract =     "Co-movement pattern mining from GPS trajectories has
                 been an intriguing subject in spatial-temporal data
                 mining. In this paper, we extend this research line by
                 migrating the data source from GPS sensors to
                 surveillance cameras, and presenting the first
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Ge:2023:EAS,
  author =       "Qian Ge and Yu Liu and Yinghao Zhao and Yuetian Sun
                 and Lei Zou and Yuxing Chen and Anqun Pan",
  title =        "Efficient and Accurate {SimRank}-Based Similarity
                 Joins: Experiments, Analysis, and Improvement",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "617--629",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636219",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636219",
  abstract =     "SimRank-based similarity joins, which mainly include
                 threshold-based and top- k similarity joins, are
                 important types of all-pair SimRank queries. Although a
                 line of related algorithms have been proposed recently,
                 they still fall short of providing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Li:2023:ERN,
  author =       "Wentao Li and Maolin Cai and Min Gao and Dong Wen and
                 Lu Qin and Wei Wang",
  title =        "Expanding Reverse Nearest Neighbors",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "630--642",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636220",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636220",
  abstract =     "In a graph, the reverse nearest neighbors (RNN) of
                 vertex f refer to the set of vertices that consider f
                 as their nearest neighbor. When f represents a facility
                 like a subway station, its RNN comprises potential
                 users who prefer the nearest facility. In \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2023:ESO,
  author =       "Fuheng Zhao and Divyakant Agrawal and Amr {El Abbadi}
                 and Ahmed Metwally and Claire Mathieu and Michel de
                 Rougemont",
  title =        "Errata for {``SpaceSaving$ \pm $: an Optimal Algorithm
                 for Frequency Estimation and Frequent Items in the
                 Bounded-Deletion Model''}",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "643",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636221",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  note =         "See \cite{Zhao:2022:SPO}.",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636221",
  abstract =     "This errata article points out an implicit assumption
                 in the work of four of us published in VLDB 2022. The
                 SpaceSaving\pm algorithm in bounded deletion data
                 stream presented in the paper implicitly assumed
                 deletions happen after all insertions. When \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Siddiqui:2023:CET,
  author =       "Tarique Siddiqui and Vivek Narasayya and Marius
                 Dumitru and Surajit Chaudhuri",
  title =        "Cache-Efficient Top-$k$ Aggregation over High
                 Cardinality Large Datasets",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "644--656",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636222",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636222",
  abstract =     "Top-k aggregation queries are widely used in data
                 analytics for summarizing and identifying important
                 groups from large amounts of data. These queries are
                 usually processed by first computing exact aggregates
                 for all groups and then selecting the groups \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cai:2023:ETB,
  author =       "Xinwei Cai and Xiangyu Ke and Kai Wang and Lu Chen and
                 Tianming Zhang and Qing Liu and Yunjun Gao",
  title =        "Efficient Temporal Butterfly Counting and Enumeration
                 on Temporal Bipartite Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "657--670",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636223",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636223",
  abstract =     "Bipartite graphs characterize relationships between
                 two different sets of entities, like actor-movie,
                 user-item, and author-paper. The butterfly, a
                 4-vertices 4-edges (2,2)-biclique, is the simplest
                 cohesive motif in a bipartite graph and is the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhong:2023:TTB,
  author =       "Tianxiong Zhong and Zhiwei Zhang and Guo Lu and Ye
                 Yuan and Yu-Ping Wang and Guoren Wang",
  title =        "{TVM}: a Tile-based Video Management Framework",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "671--684",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636224",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636224",
  abstract =     "With the exponential growth of video data, there is a
                 pressing need for efficient video analysis technology.
                 Modern query frameworks aim to accelerate queries by
                 reducing the frequency of calls to expensive deep
                 neural networks, which often overlook the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:SCR,
  author =       "Yi Zhang and Jan Deriu and George
                 Katsogiannis-Meimarakis and Catherine Kosten and
                 Georgia Koutrika and Kurt Stockinger",
  title =        "{ScienceBenchmark}: a Complex Real-World Benchmark for
                 Evaluating Natural Language to {SQL} Systems",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "685--698",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636225",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636225",
  abstract =     "Natural Language to SQL systems (NL-to-SQL) have
                 recently shown improved accuracy (exceeding 80\%) for
                 natural language to SQL query translation due to the
                 emergence of transformer-based language models, and the
                 popularity of the Spider benchmark. However,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Chen:2023:DMS,
  author =       "Lu Chen and Chengfei Liu and Rui Zhou and Kewen Liao
                 and Jiajie Xu and Jianxin Li",
  title =        "Densest Multipartite Subgraph Search in Heterogeneous
                 Information Networks",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "699--711",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636226",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636226",
  abstract =     "Cohesive multipartite subgraphs (CMS) in heterogeneous
                 information networks (HINs) uncover closely connected
                 vertex groups of multiple types, enhancing real
                 applications like community search and anomaly
                 detection. However, existing works for HINs pay
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Nagrecha:2023:SOD,
  author =       "Kabir Nagrecha and Arun Kumar",
  title =        "{Saturn}: an Optimized Data System for
                 Multi-Large-Model Deep Learning Workloads",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "712--725",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636227",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636227",
  abstract =     "Large models such as GPT-3 and ChatGPT have
                 transformed deep learning (DL), powering applications
                 that have captured the public's imagination. Such
                 models must be trained on multiple GPUs due to their
                 size and computational load, driving the development
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cai:2023:BTF,
  author =       "Miao Cai and Junru Shen and Yifan Yuan and Zhihao Qu
                 and Baoliu Ye",
  title =        "{BonsaiKV}: Towards Fast, Scalable, and Persistent
                 Key--Value Stores with Tiered, Heterogeneous Memory
                 System",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "726--739",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636228",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636228",
  abstract =     "Emerging NUMA/CXL-based tiered memory systems with
                 heterogeneous memory devices such as DRAM and NVMM
                 deliver ultrafast speed, large capacity, and data
                 persistence all at once, offering great promise to
                 high-performance in-memory key-value stores. To
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Reiner:2023:SEC,
  author =       "Silvan Reiner and Michael Grossniklaus",
  title =        "Sample-Efficient Cardinality Estimation Using
                 Geometric Deep Learning",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "740--752",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636229",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636229",
  abstract =     "In database systems, accurate cardinality estimation
                 is a cornerstone of effective query optimization. In
                 this context, estimators that use machine learning have
                 shown significant promise. Despite their potential, the
                 effectiveness of these learned \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2023:MTS,
  author =       "Kai Zhao and Chenjuan Guo and Yunyao Cheng and Peng
                 Han and Miao Zhang and Bin Yang",
  title =        "Multiple Time Series Forecasting with Dynamic Graph
                 Modeling",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "753--765",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636230",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636230",
  abstract =     "Multiple time series forecasting plays an essential
                 role in many applications. Solutions based on graph
                 neural network (GNN) that deliver state-of-the-art
                 forecasting performance use the relation graph which
                 can capture historical correlations among time
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cheng:2023:WGA,
  author =       "Yunyao Cheng and Peng Chen and Chenjuan Guo and Kai
                 Zhao and Qingsong Wen and Bin Yang and Christian S.
                 Jensen",
  title =        "Weakly Guided Adaptation for Robust Time Series
                 Forecasting",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "766--779",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636231",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636231",
  abstract =     "Robust multivariate time series forecasting is crucial
                 in many cyberphysical and Internet of Things
                 applications. Existing state-of-the-art robust
                 forecasting models decompose time series into
                 independent functions covering trends and
                 periodicities. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Yang:2023:ACA,
  author =       "Rui Yang and Evgenios M. Kornaropoulos and Yue Cheng",
  title =        "Algorithmic Complexity Attacks on Dynamic Learned
                 Indexes",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "780--793",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636232",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636232",
  abstract =     "Learned Index Structures (LIS) view a sorted index as
                 a model that learns the data distribution, takes a data
                 element key as input, and outputs the predicted
                 position of the key. The original LIS can only handle
                 lookup operations with no support for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhu:2023:MDC,
  author =       "Jiaqi Zhu and Shaofeng Cai and Fang Deng and Beng Chin
                 Ooi and Wenqiao Zhang",
  title =        "{METER}: a Dynamic Concept Adaptation Framework for
                 Online Anomaly Detection",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "794--807",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636233",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636233",
  abstract =     "Real-time analytics and decision-making require online
                 anomaly detection (OAD) to handle drifts in data
                 streams efficiently and effectively. Unfortunately,
                 existing approaches are often constrained by their
                 limited detection capacity and slow adaptation
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhang:2023:EAL,
  author =       "Hailin Zhang and Penghao Zhao and Xupeng Miao and
                 Yingxia Shao and Zirui Liu and Tong Yang and Bin Cui",
  title =        "Experimental Analysis of Large-Scale Learnable Vector
                 Storage Compression",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "808--822",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636234",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636234",
  abstract =     "Learnable embedding vector is one of the most
                 important applications in machine learning, and is
                 widely used in various database-related domains.
                 However, the high dimensionality of sparse data in
                 recommendation tasks and the huge volume of corpus in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhao:2023:CSC,
  author =       "Yue Zhao and Zhaodonghui Li and Gao Cong",
  title =        "A Comparative Study and Component Analysis of Query
                 Plan Representation Techniques in {ML4DB} Studies",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "823--835",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636235",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636235",
  abstract =     "Query plan is widely used as input in machine learning
                 for databases (ML4DB) research, with query plan
                 representation as a critical step. However, existing
                 studies typically focus on one task, and propose a
                 novel design to represent query plans along \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhuang:2023:TGD,
  author =       "Zeyang Zhuang and Penghui Li and Pingchuan Ma and Wei
                 Meng and Shuai Wang",
  title =        "Testing Graph Database Systems via Graph-Aware
                 Metamorphic Relations",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "836--848",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636236",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636236",
  abstract =     "Graph database systems (GDBs) have supported many
                 important real-world applications such as social
                 networks, logistics, and path planning. Meanwhile,
                 logic bugs are also prevalent in GDBs, leading to
                 incorrect results and severe consequences. However,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cong:2023:OCE,
  author =       "Tianji Cong and Madelon Hulsebos and Zhenjie Sun and
                 Paul Groth and H. V. Jagadish",
  title =        "Observatory: Characterizing Embeddings of Relational
                 Tables",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "849--862",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636237",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636237",
  abstract =     "Language models and specialized table embedding models
                 have recently demonstrated strong performance on many
                 tasks over tabular data. Researchers and practitioners
                 are keen to leverage these models in many new
                 application contexts; but limited \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Kim:2023:FAD,
  author =       "Taeyoon Kim and ChanHo Park and Mansur Mukimbekov and
                 Heelim Hong and Minseok Kim and Ze Jin and Changdae Kim
                 and Ji-Yong Shin and Myeongjae Jeon",
  title =        "{FusionFlow}: Accelerating Data Preprocessing for
                 Machine Learning with {CPU--GPU} Cooperation",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "863--876",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636238",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636238",
  abstract =     "Data augmentation enhances the accuracy of DL models
                 by diversifying training samples through a sequence of
                 data transformations. While recent advancements in data
                 augmentation have demonstrated remarkable efficacy,
                 they often rely on computationally \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Mohr-Daurat:2023:BAD,
  author =       "Hubert Mohr-Daurat and Xuan Sun and Holger Pirk",
  title =        "{BOSS} --- an Architecture for Database Kernel
                 Composition",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "877--890",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636239",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636239",
  abstract =     "Composable Database System Research has yielded
                 components such as Apache Arrow for Storage, Meta's
                 Velox for processing and Apache Calcite for query
                 planning. What is lacking, however, is a design for a
                 general, efficient and easy-to-use architecture to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhi:2023:CBC,
  author =       "Xiangyu Zhi and Xiao Yan and Bo Tang and Ziyao Yin and
                 Yanchao Zhu and Minqi Zhou",
  title =        "{CoroGraph}: Bridging Cache Efficiency and Work
                 Efficiency for Graph Algorithm Execution",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "891--903",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636240",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636240",
  abstract =     "Many systems are designed to run graph algorithms
                 efficiently in memory but they achieve only cache
                 efficiency or work efficiency. We tackle this
                 fundamental trade-off in existing systems by designing
                 CoroGraph, a system that attains both cache \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Cheng:2023:MSO,
  author =       "Audrey Cheng and Jack Waudby and Hugo Firth and
                 Natacha Crooks and Ion Stoica",
  title =        "Mammoths are Slow: The Overlooked Transactions of
                 Graph Data",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "904--911",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636241",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636241",
  abstract =     "This paper argues for better concurrency control to
                 support mammoth transactions, which read and write to
                 many items. While these requests are prevalent on graph
                 data, few systems support them efficiently. Currently,
                 developers must make the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

@Article{Zhou:2023:VVS,
  author =       "Enyuan Zhou and Song Guo and Zicong Hong and Christian
                 S. Jensen and Yang Xiao and Dalin Zhang and Jinwen
                 Liang and Qingqi Pei",
  title =        "{VeriDKG}: a Verifiable {SPARQL} Query Engine for
                 Decentralized Knowledge Graphs",
  journal =      j-PROC-VLDB-ENDOWMENT,
  volume =       "17",
  number =       "4",
  pages =        "912--925",
  month =        dec,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.14778/3636218.3636242",
  ISSN =         "2150-8097",
  ISSN-L =       "2150-8097",
  bibdate =      "Wed Mar 20 07:37:01 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbe.bib",
  URL =          "https://dl.acm.org/doi/10.14778/3636218.3636242",
  abstract =     "The ability to decentralize knowledge graphs (KG) is
                 important to exploit the full potential of the Semantic
                 Web and realize the Web 3.0 vision. However,
                 decentralization also renders KGs more prone to attacks
                 with adverse effects on data integrity and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "Proc. VLDB Endowment",
  fjournal =     "Proceedings of the VLDB Endowment",
  journal-URL =  "https://dl.acm.org/loi/pvldb",
}

%%% [20-Mar-2024] TO DO: v13n11 (July 2020) is STILL not yet published