@Preamble{"\input bibnames.sty" #
"\def \TM {${}^{\sc TM}$}"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-JDIQ = "Journal of Data and Information
Quality (JDIQ)"}
@Article{Madnick:2009:EII,
author = "Stuart E. Madnick and Yang W. Lee",
title = "Editorial for the Inaugural Issue of the {ACM Journal
of Data and Information Quality (JDIQ)}",
journal = j-JDIQ,
volume = "1",
number = "1",
pages = "1:1--1:??",
month = jun,
year = "2009",
CODEN = "????",
ISSN = "1936-1955",
bibdate = "Fri Sep 18 15:11:35 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Madnick:2009:OFD,
author = "Stuart E. Madnick and Richard Y. Wang and Yang W. Lee
and Hongwei Zhu",
title = "Overview and Framework for Data and Information
Quality Research",
journal = j-JDIQ,
volume = "1",
number = "1",
pages = "2:1--2:??",
month = jun,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1515693.1516680",
ISSN = "1936-1955",
bibdate = "Fri Sep 18 15:11:35 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Awareness of data and information quality issues has
grown rapidly in light of the critical role played by
the quality of information in our data-intensive,
knowledge-based economy. Research in the past two
decades has produced a large body of data quality
knowledge and has expanded our ability to solve many
data and information quality problems. In this article,
we present an overview of the evolution and current
landscape of data and information quality research. We
introduce a framework to characterize the research
along two dimensions: topics and methods.
Representative papers are cited for purposes of
illustrating the issues addressed and the methods used.
We also identify and discuss challenges to be addressed
in future research.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Li:2009:BAE,
author = "Xiao-Bai Li",
title = "A {Bayesian} Approach for Estimating and Replacing
Missing Categorical Data",
journal = j-JDIQ,
volume = "1",
number = "1",
pages = "3:1--3:??",
month = jun,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1515693.1515695",
ISSN = "1936-1955",
bibdate = "Fri Sep 18 15:11:35 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "We propose a new approach for estimating and replacing
missing categorical data. With this approach, the
posterior probabilities of a missing attribute value
belonging to a certain category are estimated using the
simple Bayes method. Two alternative methods for
replacing the missing value are proposed: The first
replaces the missing value with the value having the
estimated maximum probability; the second uses a value
that is selected with probability proportional to the
estimated posterior distribution. The effectiveness of
the proposed approach is evaluated based on some
important data quality measures for data warehousing
and data mining. The results of the experimental study
demonstrate the effectiveness of the proposed
approach.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Weber:2009:OSD,
author = "Kristin Weber and Boris Otto and Hubert {\"O}sterle",
title = "One Size Does Not Fit All---{A} Contingency Approach
to Data Governance",
journal = j-JDIQ,
volume = "1",
number = "1",
pages = "4:1--4:??",
month = jun,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1515693.1515696",
ISSN = "1936-1955",
bibdate = "Fri Sep 18 15:11:35 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Enterprizes need Data Quality Management (DQM) to
respond to strategic and operational challenges
demanding high-quality corporate data. Hitherto,
companies have mostly assigned accountabilities for DQM
to Information Technology (IT) departments. They have
thereby neglected the organizational issues critical to
successful DQM. With data governance, however,
companies may implement corporate-wide accountabilities
for DQM that encompass professionals from business and
IT departments. This research aims at starting a
scientific discussion on data governance by
transferring concepts from IT governance and
organizational theory to the previously largely ignored
field of data governance. The article presents the
first results of a community action research project on
data governance comprising six international companies
from various industries. It outlines a data governance
model that consists of three components (data quality
roles, decision areas, and responsibilities), which
together form a responsibility assignment matrix. The
data governance model documents data quality roles and
their type of interaction with DQM activities. In
addition, the article describes a data governance
contingency model and demonstrates the influence of
performance strategy, diversification breadth,
organization structure, competitive strategy, degree of
process harmonization, degree of market regulation, and
decision-making style on data governance. Based on
these findings, companies can structure their specific
data governance model.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Heinrich:2009:PDM,
author = "B. Heinrich and M. Klier and M. Kaiser",
title = "A Procedure to Develop Metrics for Currency and its
Application in {CRM}",
journal = j-JDIQ,
volume = "1",
number = "1",
pages = "5:1--5:??",
month = jun,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1515693.1515697",
ISSN = "1936-1955",
bibdate = "Fri Sep 18 15:11:35 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Due to the importance of using up-to-date data in
information systems, this article analyzes how the
data-quality dimension currency can be quantified.
Based on several requirements (e.g., normalization and
interpretability) and a literature review, we design a
procedure to develop probability-based metrics for
currency which can be adjusted to the specific
characteristics of data attribute values. We evaluate
the presented procedure with regard to the requirements
and illustrate the applicability as well as its
practical benefit. In cooperation with a major German
mobile services provider, the procedure was applied in
the field of campaign management in order to improve
both success rates and profits.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Madnick:2009:ELS,
author = "Stuart E. Madnick and Yang W. Lee",
title = "Editorial Letter for the Special Issue on Data Quality
in Databases and Information Systems",
journal = j-JDIQ,
volume = "1",
number = "2",
pages = "6:1--6:??",
month = sep,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1577840.1577841",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:40 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Naumann:2009:GES,
author = "Felix Naumann and Louiqa Raschid",
title = "Guest Editorial for the Special Issue on Data Quality
in Databases",
journal = j-JDIQ,
volume = "1",
number = "2",
pages = "7:1--7:??",
month = sep,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1577840.1577842",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:40 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Dash:2009:MLN,
author = "Manoranjan Dash and Ayush Singhania",
title = "Mining in Large Noisy Domains",
journal = j-JDIQ,
volume = "1",
number = "2",
pages = "8:1--8:??",
month = sep,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1577840.1577843",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:40 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In this article we address the issue of how to mine
efficiently in large and noisy data. We propose an
efficient sampling algorithm ({\em Concise\/}) as a
solution for large and noisy data. Concise is far more
superior than the Simple Random Sampling ({\em SRS\/})
in selecting a representative sample. Particularly when
the data is very large and noisy, Concise achieves the
maximum gain over SRS. The comparison is in terms of
their impact on subsequent data mining tasks,
specifically, classification, clustering, and
association rule mining. We compared Concise with a few
existing noise removal algorithms followed by SRS.
Although the accuracy of mining results are similar,
Concise spends very little time compared to the
existing algorithms because Concise has linear time
complexity.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "association rule mining; classification; clustering;
data mining; Information filtering; sampling; selection
process",
}
@Article{Moustakides:2009:OSR,
author = "George V. Moustakides and Vassilios S. Verykios",
title = "Optimal Stopping: a Record-Linkage Approach",
journal = j-JDIQ,
volume = "1",
number = "2",
pages = "9:1--9:??",
month = sep,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1577840.1577844",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:40 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Record-linkage is the process of identifying whether
two separate records refer to the same real-world
entity when some elements of the record's identifying
information (attributes) agree and others disagree.
Existing record-linkage decision methodologies use the
outcomes from the comparisons of the whole set of
attributes. Here, we propose an alternative scheme that
assesses the attributes sequentially, allowing for a
decision to made at any attribute's comparison stage,
and thus before exhausting all available attributes.
The scheme we develop is optimum in that it minimizes a
well-defined average cost criterion while the
corresponding optimum solution can be easily mapped
into a decision tree to facilitate the record-linkage
decision process. Experimental results performed in
real datasets indicate the superiority of our
methodology compared to existing approaches.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "duplicate detection; optimal stopping;
Record-linkage",
}
@Article{Klein:2009:RDQ,
author = "A. Klein and W. Lehner",
title = "Representing Data Quality in Sensor Data Streaming
Environments",
journal = j-JDIQ,
volume = "1",
number = "2",
pages = "10:1--10:??",
month = sep,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1577840.1577845",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:40 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Sensors in smart-item environments capture data about
product conditions and usage to support business
decisions as well as production automation processes. A
challenging issue in this application area is the
restricted quality of sensor data due to limited sensor
precision and sensor failures. Moreover, data stream
processing to meet resource constraints in streaming
environments introduces additional noise and decreases
the data quality. In order to avoid wrong business
decisions due to dirty data, quality characteristics
have to be captured, processed, and provided to the
respective business task. However, the issue of how to
efficiently provide applications with information about
data quality is still an open research problem.\par
In this article, we address this problem by presenting
a flexible model for the propagation and processing of
data quality. The comprehensive analysis of common data
stream processing operators and their impact on data
quality allows a fruitful data evaluation and
diminishes incorrect business decisions. Further, we
propose the data quality model control to adapt the
data quality granularity to the data stream
interestingness.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "data quality; Data stream processing; smart items",
}
@Article{Embury:2009:IDS,
author = "Suzanne M. Embury and Paolo Missier and Sandra Sampaio
and R. Mark Greenwood and Alun D. Preece",
title = "Incorporating Domain-Specific Information Quality
Constraints into Database Queries",
journal = j-JDIQ,
volume = "1",
number = "2",
pages = "11:1--11:??",
month = sep,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1577840.1577846",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:40 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "The range of information now available in queryable
repositories opens up a host of possibilities for new
and valuable forms of data analysis. Database query
languages such as SQL and XQuery offer a concise and
high-level means by which such analyses can be
implemented, facilitating the extraction of relevant
data subsets into either generic or bespoke data
analysis environments. Unfortunately, the quality of
data in these repositories is often highly variable.
The data is still useful, but only if the consumer is
aware of the data quality problems and can work around
them. Standard query languages offer little support for
this aspect of data management. In principle, however,
it should be possible to embed constraints describing
the consumer's data quality requirements into the query
directly, so that the query evaluator can take over
responsibility for enforcing them during query
processing.\par
Most previous attempts to incorporate information
quality constraints into database queries have been
based around a small number of highly generic quality
measures, which are defined and computed by the
information provider. This is a useful approach in some
application areas but, in practice, quality criteria
are more commonly determined by the user of the
information not by the provider. In this article, we
explore an approach to incorporating quality
constraints into database queries where the definition
of quality is set by the user and not the provider of
the information. Our approach is based around the
concept of a {\em quality view}, a configurable quality
assessment component into which domain-specific notions
of quality can be embedded. We examine how quality
views can be incorporated into XQuery, and draw from
this the language features that are required in general
to embed quality views into any query language. We also
propose some syntactic sugar on top of XQuery to
simplify the process of querying with quality
constraints.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "database query languages; Information quality; views;
XQuery",
}
@Article{Madnick:2009:CPS,
author = "Stuart E. Madnick and Yang W. Lee",
title = "Call for Papers Special Issue on Healthcare
Information Quality: the Challenges and Opportunities
in Healthcare Systems and Services",
journal = j-JDIQ,
volume = "1",
number = "2",
pages = "12:1--12:??",
month = sep,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1577840.1577847",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:40 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Madnick:2009:ECW,
author = "Stuart E. Madnick and Yang W. Lee",
title = "Editors' Comments: Where the {JDIQ} Articles Come
From: Incubating Research in an Emerging Field",
journal = j-JDIQ,
volume = "1",
number = "3",
pages = "13:1--13:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1659225.1659226",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:55 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Sessions:2009:TMD,
author = "V. Sessions and M. Valtorta",
title = "Towards a Method for Data Accuracy Assessment
Utilizing a {Bayesian} Network Learning Algorithm",
journal = j-JDIQ,
volume = "1",
number = "3",
pages = "14:1--14:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1659225.1659227",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:55 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "This research develops a data quality algorithm
entitled the Accuracy Assessment Algorithm (AAA). This
is an extension of research in developing an
enhancement to a Bayesian Network (BN) learning
algorithm called the Data Quality (DQ) algorithm. This
new algorithm is concerned with estimating the accuracy
levels of a dataset by assessing the quality of the
data with no prior knowledge of the dataset. The AAA
and associated metrics were tested using two canonical
BNs and one large-scale medical network. The article
presents the results regarding the efficacy of the
algorithm and the implications for future research and
practice.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "accuracy levels; Bayesian networks; data quality
assessment; PC algorithm",
}
@Article{Even:2009:DAD,
author = "Adir Even and G. Shankaranarayanan",
title = "Dual Assessment of Data Quality in Customer
Databases",
journal = j-JDIQ,
volume = "1",
number = "3",
pages = "15:1--15:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1659225.1659228",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:55 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Quantitative assessment of data quality is critical
for identifying the presence of data defects and the
extent of the damage due to these defects. Quantitative
assessment can help define realistic quality
improvement targets, track progress, evaluate the
impacts of different solutions, and prioritize
improvement efforts accordingly. This study describes a
methodology for quantitatively assessing both impartial
{\em and\/} contextual data quality in large datasets.
Impartial assessment measures the extent to which a
dataset is defective, independent of the context in
which that dataset is used. Contextual assessment, as
defined in this study, measures the extent to which the
presence of defects reduces a dataset's utility, the
benefits gained by using that dataset in a specific
context. The dual assessment methodology is
demonstrated in the context of Customer Relationship
Management (CRM), using large data samples from
real-world datasets. The results from comparing the two
assessments offer important insights for directing
quality maintenance efforts and prioritizing quality
improvement solutions for this dataset. The study
describes the steps and the computation involved in the
dual-assessment methodology and discusses the
implications for applying the methodology in other
business contexts and data environments.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "CRM; customer relationship management; databases; Data
quality; information value; total data quality
management",
}
@Article{Fisher:2009:AMP,
author = "Craig W. Fisher and Eitel J. M. Lauria and Carolyn C.
Matheus",
title = "An Accuracy Metric: Percentages, Randomness, and
Probabilities",
journal = j-JDIQ,
volume = "1",
number = "3",
pages = "16:1--16:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1659225.1659229",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:55 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Practitioners and researchers regularly refer to error
rates or accuracy percentages of databases. The former
is the number of cells in error divided by the total
number of cells; the latter is the number of correct
cells divided by the total number of cells. However,
databases may have similar error rates (or accuracy
percentages) but differ drastically in the complexity
of their accuracy problems. A simple percent does not
provide information as to whether the errors are
systematic or randomly distributed throughout the
database. We expand the accuracy metric to include a
randomness measure and include a probability
distribution value. The proposed randomness check is
based on the Lempel--Ziv (LZ) complexity measure.
Through two simulation studies we show that the LZ
complexity measure can clearly differentiate as to
whether the errors are random or systematic. This
determination is a significant first step and is a
major departure from the percentage-alone technique.
Once it is determined that the errors are random, a
probability distribution, Poisson, is used to help
address various managerial questions.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "complexity; Data and information quality; randomness",
}
@Article{Ababneh:2009:CSE,
author = "Sufyan Ababneh and Rashid Ansari and Ashfaq Khokhar",
title = "Compensated Signature Embedding for Multimedia Content
Authentication",
journal = j-JDIQ,
volume = "1",
number = "3",
pages = "17:1--17:??",
month = dec,
year = "2009",
CODEN = "????",
DOI = "https://doi.org/10.1145/1659225.1659230",
ISSN = "1936-1955",
bibdate = "Wed Mar 17 14:47:55 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "One of the main goals of digital content
authentication and preservation techniques is to
guarantee the originality and quality of the
information. In this article, robust watermarking is
used to embed content-based fragile signatures in
multimedia signals to achieve efficient authentication
without requiring any third-party reference or side
information. To overcome the signature alteration
caused by the embedding perturbation and other possible
encoding operations, a closed-form compensation
technique is proposed for ensuring signature
consistency by employing a Lagrangian-based approach. A
minimum distortion criterion is used to ensure signal
quality. The effectiveness of the proposed approach is
investigated with simulations of examples of image
authentication in which signatures are designed to
reveal tamper localization. Results using quantitative
performance criteria show successful authentication
over a range of robustness in embedding watermarks
using both QIM-DM and spread-spectrum techniques. A
comparison with two iterative compensation schemes is
also presented.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "compensated signature embedding; Content
authentication; watermarking",
}
@Article{Madnick:2010:ECA,
author = "Stuart E. Madnick and Yang W. Lee",
title = "{Editors}' Comments: {ACM Journal of Data and
Information Quality (JDIQ)} is alive and well!",
journal = j-JDIQ,
volume = "2",
number = "1",
pages = "1:1--1:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1805286.1805287",
ISSN = "1936-1955",
bibdate = "Tue Sep 7 08:41:54 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Tremblay:2010:UDM,
author = "Monica Chiarini Tremblay and Kaushik Dutta and Debra
Vandermeer",
title = "Using Data Mining Techniques to Discover Bias Patterns
in Missing Data",
journal = j-JDIQ,
volume = "2",
number = "1",
pages = "2:1--2:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1805286.1805288",
ISSN = "1936-1955",
bibdate = "Tue Sep 7 08:41:54 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In today's data-rich environment, decision makers draw
conclusions from data repositories that may contain
data quality problems. In this context, missing data is
an important and known problem, since it can seriously
affect the accuracy of conclusions drawn. Researchers
have described several approaches for dealing with
missing data, primarily attempting to infer values or
estimate the impact of missing data on conclusions.
However, few have considered approaches to characterize
patterns of bias in missing data, that is, to determine
the specific attributes that predict the missingness of
data values. Knowledge of the specific systematic bias
patterns in the incidence of missing data can help
analysts more accurately assess the quality of
conclusions drawn from data sets with missing data.
This research proposes a methodology to combine a
number of Knowledge Discovery and Data Mining
techniques, including association rule mining, to
discover patterns in related attribute values that help
characterize these bias patterns. We demonstrate the
efficacy of our proposed approach by applying it on a
demo census dataset seeded with biased missing data.
The experimental results show that our approach was
able to find seeded biases and filter out most seeded
noise.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "Data quality; missing data; pattern discovery",
}
@Article{Jensen:2010:JCI,
author = "Matthew L. Jensen and Judee K. Burgoon and Jay F.
{Nunamaker, Jr.}",
title = "Judging the Credibility of Information Gathered from
Face-to-Face Interactions",
journal = j-JDIQ,
volume = "2",
number = "1",
pages = "3:1--3:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1805286.1805289",
ISSN = "1936-1955",
bibdate = "Tue Sep 7 08:41:54 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "One of the most pernicious threats to information
quality comes through perpetration of deception by
information suppliers. Deception undermines many
critical dimensions of information quality, such as
accuracy, completeness, and believability. Despite this
threat, information gatherers are ill equipped to
assess the credibility of information suppliers. This
work presents a prototype system that examines messages
gathered during direct, face-to-face information
gathering. The system unobtrusively identifies kinesic
and linguistic features that may indicate deception in
information suppliers' messages. System use was found
to significantly improve assessment ability in
between-subjects and within-subjects tests. The
improved ability to accurately assess credibility
during face-to-face interactions should yield higher
information quality.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "Credibility assessment; deception detection;
decision-aids; human-computer interaction; information
veracity; kinesics; linguistics",
}
@Article{Meda:2010:DDF,
author = "Hema S. Meda and Anup Kumar Sen and Amitava Bagchi",
title = "On Detecting Data Flow Errors in Workflows",
journal = j-JDIQ,
volume = "2",
number = "1",
pages = "4:1--4:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1805286.1805290",
ISSN = "1936-1955",
bibdate = "Tue Sep 7 08:41:54 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "When designing a business workflow, it is customary
practice to create the control flow structure first and
to ensure its correctness. Information about the flow
of data is introduced subsequently into the workflow
and its correctness is independently verified. Improper
specification of data requirements of tasks and XOR
splits can cause problems such as wrong branching at
XOR splits and the failure of tasks to execute. Here we
present a graph traversal algorithm called GTforDF for
detecting data flow errors in both nested and
unstructured workflows, and illustrate its operation on
realistic examples. Two of these have interconnected
loops and are free of control flow errors, and the
third one is an unstructured loop-free workflow. Our
approach extends and generalizes data flow verification
methods that have been recently proposed. It also makes
use of the concept of corresponding pairs lately
introduced in control flow verification. It thus has
the potential for development into a unified
algorithmic procedure for the concurrent detection of
control flow and data flow errors. The correctness of
the algorithm has been proved theoretically. It has
also been tested experimentally on many examples.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "Corresponding pair; Data flow errors; Workflow
management",
}
@Article{Magnani:2010:SUM,
author = "Matteo Magnani and Danilo Montesi",
title = "A Survey on Uncertainty Management in Data
Integration",
journal = j-JDIQ,
volume = "2",
number = "1",
pages = "5:1--5:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1805286.1805291",
ISSN = "1936-1955",
bibdate = "Tue Sep 7 08:41:54 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In the last few years, uncertainty management has come
to be recognized as a fundamental aspect of data
integration. It is now accepted that it may not be
possible to remove uncertainty generated during data
integration processes and that uncertainty in itself
may represent a source of relevant information. Several
issues, such as the aggregation of uncertain mappings
and the querying of uncertain mediated schemata, have
been addressed by applying well-known uncertainty
management theories. However, several problems lie
unresolved. This article sketches an initial picture of
this highly active research area; it details existing
works in the light of a homogeneous framework, and
identifies and discusses the leading issues awaiting
solutions.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
keywords = "Data integration; uncertainty",
}
@Article{Talburt:2010:CPS,
author = "John R. Talburt and Stuart E. Madnick and Yang W.
Lee",
title = "Call for Papers: Special Issue on Entity Resolution",
journal = j-JDIQ,
volume = "2",
number = "1",
pages = "6:1--6:??",
month = jul,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1805286.1805292",
ISSN = "1936-1955",
bibdate = "Tue Sep 7 08:41:54 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Madnick:2011:ESN,
author = "Stuart E. Madnick and Yang W. Lee",
title = "Editorial: In Search of Novel Ideas and Solutions with
a Broader Context of Data Quality in Mind",
journal = j-JDIQ,
volume = "2",
number = "2",
pages = "7:1--7:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1891879.1891880",
ISSN = "1936-1955",
bibdate = "Mon Mar 28 12:03:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Blake:2011:EID,
author = "Roger Blake and Paul Mangiameli",
title = "The Effects and Interactions of Data Quality and
Problem Complexity on Classification",
journal = j-JDIQ,
volume = "2",
number = "2",
pages = "8:1--8:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1891879.1891881",
ISSN = "1936-1955",
bibdate = "Mon Mar 28 12:03:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Gelman:2011:GGA,
author = "Irit Askira Gelman",
title = "{GIGO} or not {GIGO}: The Accuracy of Multi-Criteria
Satisficing Decisions",
journal = j-JDIQ,
volume = "2",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1891879.1891882",
ISSN = "1936-1955",
bibdate = "Mon Mar 28 12:03:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Fan:2011:GBN,
author = "Xiaoming Fan and Jianyong Wang and Xu Pu and Lizhu
Zhou and Bing Lv",
title = "On Graph-Based Name Disambiguation",
journal = j-JDIQ,
volume = "2",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1891879.1891883",
ISSN = "1936-1955",
bibdate = "Mon Mar 28 12:03:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ngugi:2011:TBI,
author = "Benjamin Ngugi and Beverly K. Kahn and Marilyn
Tremaine",
title = "Typing Biometrics: Impact of Human Learning on
Performance Quality",
journal = j-JDIQ,
volume = "2",
number = "2",
pages = "11:1--11:??",
month = feb,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1891879.1891884",
ISSN = "1936-1955",
bibdate = "Mon Mar 28 12:03:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Madnick:2011:ENC,
author = "Stuart E. Madnick and Yang W. Lee",
title = "Editorial Notes: Classification and Assessment of
Large Amounts of Data: Examples in the Healthcare
Industry and Collaborative Digital Libraries",
journal = j-JDIQ,
volume = "2",
number = "3",
pages = "12:1--12:??",
month = dec,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2063504.2063505",
ISSN = "1936-1955",
bibdate = "Thu Dec 15 09:41:55 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Lauria:2011:CBT,
author = "Eitel J. M. Laur{\'\i}a and Alan D. March",
title = "Combining {Bayesian} Text Classification and Shrinkage
to Automate Healthcare Coding: a Data Quality
Analysis",
journal = j-JDIQ,
volume = "2",
number = "3",
pages = "13:1--13:??",
month = dec,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2063504.2063506",
ISSN = "1936-1955",
bibdate = "Thu Dec 15 09:41:55 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Dalip:2011:AAD,
author = "Daniel Hasan Dalip and Marcos Andr{\'e}
Gon{\c{c}}alves and Marco Cristo and P{\'a}vel Calado",
title = "Automatic Assessment of Document Quality in {Web}
Collaborative Digital Libraries",
journal = j-JDIQ,
volume = "2",
number = "3",
pages = "14:1--14:??",
month = dec,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/2063504.2063507",
ISSN = "1936-1955",
bibdate = "Thu Dec 15 09:41:55 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Muller:2012:IDQ,
author = "Heiko M{\"u}ller and Johann-Christoph Freytag and Ulf
Leser",
title = "Improving data quality by source analysis",
journal = j-JDIQ,
volume = "2",
number = "4",
pages = "15:1--15:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2107536.2107538",
ISSN = "1936-1955",
bibdate = "Fri Mar 16 15:01:48 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In many domains, data cleaning is hampered by our
limited ability to specify a comprehensive set of
integrity constraints to assist in identification of
erroneous data. An alternative approach to improve data
quality is to exploit different data sources that
contain information about the same set of objects. Such
overlapping sources highlight hot-spots of poor data
quality through conflicting data values and immediately
provide alternative values for conflict resolution. In
order to derive a dataset of high quality, we can merge
the overlapping sources based on a quality assessment
of the conflicting values. The quality of the resulting
dataset, however, is highly dependent on our ability to
asses the quality of conflicting values effectively.
The main objective of this article is to introduce
methods that aid the developer of an integrated system
over overlapping, but contradicting sources in the task
of improving the quality of data. Value conflicts
between contradicting sources are often systematic,
caused by some characteristic of the different sources.
Our goal is to identify such systematic differences and
outline data patterns that occur in conjunction with
them. Evaluated by an expert user, the regularities
discovered provide insights into possible conflict
reasons and help to assess the quality of inconsistent
values. The contributions of this article are two
concepts of systematic conflicts: contradiction
patterns and minimal update sequences. Contradiction
patterns resemble a special form of association rules
that summarize characteristic data properties for
conflict occurrence. We adapt existing association rule
mining algorithms for mining contradiction patterns.
Contradiction patterns, however, view each class of
conflicts in isolation, sometimes leading to largely
overlapping patterns. Sequences of set-oriented update
operations that transform one data source into the
other are compact descriptions for all regular
differences among the sources. We consider minimal
update sequences as the most likely explanation for
observed differences between overlapping data sources.
Furthermore, the order of operations within the
sequences point out potential dependencies between
systematic differences. Finding minimal update
sequences, however, is beyond reach in practice. We
show that the problem already is NP-complete for a
restricted set of operations. In the light of this
intractability result, we present heuristics that lead
to convincing results for all examples we considered.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Gelman:2012:BMC,
author = "Irit Askira Gelman",
title = "Biases in multi-criteria, satisfying decisions due to
data errors",
journal = j-JDIQ,
volume = "2",
number = "4",
pages = "16:1--16:??",
month = feb,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2107536.2107539",
ISSN = "1936-1955",
bibdate = "Fri Mar 16 15:01:48 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "This inquiry centers on an asymmetry, or bias, in the
accuracy of multi-criteria, conjunctive, and
disjunctive decisions, which originates from
fundamental properties of the logical conjunction and
disjunction operations. A mathematical-statistical
analysis indicates that, as we keep adding criteria to
a multi-criteria conjunctive or disjunctive decision
rule, errors in the data produce decision errors
asymmetrically. As a result, in conjunctive decisions,
the probability of a false negative increases while the
probability of a false positive decreases. In contrast,
in disjunctive decisions, as we keep adding criteria,
the probability of a false positive increases while
that of a false negative decreases. For instance, in a
conjunctive business decision rule, the probability of
overlooking a bargain can be far greater than the
probability of misjudging an unattractive offer to be a
good one. A series of Monte Carlo simulations validates
the analytical findings and explores the contribution
of several additional factors.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Sachdeva:2012:SIS,
author = "Shelly Sachdeva and Subhash Bhalla",
title = "Semantic interoperability in standardized electronic
health record databases",
journal = j-JDIQ,
volume = "3",
number = "1",
pages = "1:1--1:??",
month = apr,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2166788.2166789",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:12 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Different clinics and hospitals have their own
information systems to maintain patient data. This
hinders the exchange of data among systems (and
organizations). Hence there is a need to provide
standards for data exchange. In digitized form, the
individual patient's medical record can be stored,
retrieved, and shared over a network through
enhancement in information technology. Thus, electronic
health records (EHRs) should be standardized,
incorporating semantic interoperability. A subsequent
step requires that healthcare professionals and
patients get involved in using the EHRs, with the help
of technological developments. This study aims to
provide different approaches in understanding some
current and challenging concepts in health informatics.
Successful handling of these challenges will lead to
improved quality in healthcare by reducing medical
errors, decreasing costs, and enhancing patient care.
The study is focused on the following goals: (1)
understanding the role of EHRs; (2) understanding the
need for standardization to improve quality; (3)
establishing interoperability in maintaining EHRs; (4)
examining a framework for standardization and
interoperability (the openEHR architecture); (5)
identifying the role of archetypes for knowledge-based
systems; and (6) understanding the difficulties in
querying HER data.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Brown:2012:DQT,
author = "Steven Brown and Trent S. Rosenbloom and Shawn P.
Hardenbrook and Terry Clark and Elliot Fielstein and
Peter Elkin and Ted Speroff",
title = "Documentation quality and time costs: a randomized
controlled trial of structured entry versus dictation",
journal = j-JDIQ,
volume = "3",
number = "1",
pages = "2:1--2:??",
month = apr,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2166788.2166790",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:12 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "The Department of Veterans Affairs (VA) performs over
800,000 disability exams and distributes over
{\&}dollor;37 billion in disability benefits per year.
VA developed and deployed a computer-based disability
exam documentation system in order to improve exam
report quality and timeliness. We conducted a
randomized controlled trial comparing joint disability
examinations supported by computerized templates to the
examinations documented via dictation, to determine if
the system met the intended goals or had unintended
consequences. Consenting veterans were randomized to
undergo exams documented using computerized templates
or via dictation. We compared exam report quality,
documentation time costs, encounter length, total time
to fulfill an exam request with a finalized exam
report, and veteran satisfaction. Computer-based
templates resulted in disability exam reports that had
higher quality scores (p. 0.042) and were returned to
the requesting office faster than exam reports created
via dictation (p. 0.02). Documentation time and veteran
satisfaction were similar for both the documentation
techniques. Encounter length was significantly longer
for the template group. Computer-based templates
impacted the VA disability evaluation system by
improving report quality scores and production time and
lengthening encounter times. Oversight bodies have
called for mandated use of computer-based templates
nationwide. We believe mandates regarding use of health
information technology should be guided by data
regarding its positive and negative impacts.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Sunyaev:2012:SCD,
author = "Ali Sunyaev and Dmitry Chornyi",
title = "Supporting chronic disease care quality: Design and
implementation of a health service and its integration
with electronic health records",
journal = j-JDIQ,
volume = "3",
number = "2",
pages = "3:1--3:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2184442.2184443",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:12 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Chronic medical conditions take a huge toll on lives
of a growing number of people and are a major
contributor to the rising costs in healthcare. As
patients are increasingly willing to take an active
part in managing their conditions, chronic disease
self-management programs and information systems that
support them are recognized for their potential to
improve the quality of healthcare delivery. These
programs often rely on recording longitudinal patient
data and analyzing it. Therefore, maintaining
appropriate data quality is important for
self-management programs to be efficient and safe. We
designed and implemented a prototype of a health
self-management service for chronically ill people. It
is a distributed application that supports patients
with diabetes at tracking their blood glucose levels.
The main design goals were usability, extensibility,
security, and interoperability. The system integrates
with the Microsoft HealthVault and Google Health
personal health record platforms. It utilizes
industry-strength storage and security mechanisms, is
scalable, and as a result, can be used to gather,
securely store, and analyze patient data over long
periods of time. In this article we examine how
software information technology can support chronic
disease self-management and its impact on the quality
of patient data. Furthermore, we describe the
requirements that drove the system's development, its
architecture, and design decisions.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Elizabeth:2012:NSA,
author = "D. Shiloah Elizabeth and H. Khanna Nehemiah and C.
Sunil Retmin Raj and A. Kannan",
title = "A novel segmentation approach for improving diagnostic
accuracy of {CAD} systems for detecting lung cancer
from chest computed tomography images",
journal = j-JDIQ,
volume = "3",
number = "2",
pages = "4:1--4:??",
month = may,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2184442.2184444",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:12 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Segmentation of lung tissue is an important and
challenging task in any computer aided diagnosis
system. The accuracy of the segmentation subsystem
determines the performance of the other subsystems in
any computer aided diagnosis system based on image
analysis. We propose a novel technique for segmentation
of lung tissue from computed tomography of the chest.
Manual segmentation of lung parenchyma becomes
difficult with an enormous volume of images. The goal
of this work is to present an automated approach to
segmentation of lung parenchyma from the rest of the
chest CT image. The approach involves the conventional
optimal thresholding technique and operations based on
convex edge and centroid properties of the lung region.
The segmentation technique proposed in this article can
be used to preprocess lung images given to a computer
aided diagnosis system for diagnosis of lung disorders.
This improves the diagnostic performance of the system.
This has been tested by using it in a computer aided
diagnosis system that was used for detection of lung
cancer from chest computed tomography images. The
results obtained show that the lungs can be correctly
segmented even in the presence of peripheral pathology
bearing regions; pathology bearing regions that could
not be detected using a CAD system that applies optimal
thresholding could be detected using a CAD system using
out proposed approach for segmentation of lungs.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Yakout:2012:EPA,
author = "Mohamed Yakout and Mikhail J. Atallah and Ahmed
Elmagarmid",
title = "Efficient and Practical Approach for Private Record
Linkage",
journal = j-JDIQ,
volume = "3",
number = "3",
pages = "5:1--5:??",
month = aug,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2287714.2287715",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:13 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Record linkage is used to associate entities from
multiple data sources. For example, two organizations
contemplating a merger may want to know how common
their customer bases are so that they may better assess
the benefits of the merger. Another example is a
database of people who are forbidden from a certain
activity by regulators, may need to be compared to a
list of people engaged in that activity. The autonomous
entities who wish to carry out the record matching
computation are often reluctant to fully share their
data; they fear losing control over its subsequent
dissemination and usage, or they want to insure privacy
because the data is proprietary or confidential, and/or
they are cautious simply because privacy laws forbid
its disclosure or regulate the form of that disclosure.
In such cases, the problem of carrying out the linkage
computation without full data exchange has been called
private record linkage. Previous private record linkage
techniques have made use of a third party. We provide
efficient techniques for private record linkage that
improve on previous work in that (1) our techniques
make no use of a third party, and (2) they achieve much
better performance than previous schemes in terms of
their execution time while maintaining acceptable
quality of output compared to nonprivacy settings. Our
protocol consists of two phases. The first phase
primarily produces candidate record pairs for matching,
by carrying out a very fast (but not accurate) matching
between such pairs of records. The second phase is a
novel protocol for efficiently computing distances
between each candidate pair (without any expensive
cryptographic operations such as modular
exponentiations). Our experimental evaluation of our
approach validates these claims.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Yang:2012:ECD,
author = "Yanjuan Yang and Michael Mannino",
title = "An Experimental Comparison of a Document Deception
Detection Policy using Real and Artificial Deception",
journal = j-JDIQ,
volume = "3",
number = "3",
pages = "6:1--6:??",
month = aug,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2287714.2287716",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:13 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Developing policies to screen documents for deception
is often hampered by the cost of data collection and
the inability to evaluate policy alternatives due to
lack of data. To lower data collection costs and
increase the amount of data, artificially generated
deception data can be used, but the impact of using
artificially generated deception data is not well
understood. This article studies the impact of
artificially generated deception on document screening
policies. The deception and truth data were collected
from financial aid applications, a document-centric
area with limited resources for screening. Real
deception was augmented with artificial data generated
by noise and deception generation models. Using the
real data and artificially generated data, we designed
an innovative experiment with deception type and
deception rate as factors, and harmonic mean and cost
as outcome variables. We used two budget models (fixed
and variable) typically employed by financial aid
offices to measure the cost of noncompliance in
financial aid applications. The analysis included an
evaluation of a common policy for deception screening
using both fixed and varying screening rates. The
results of the experiment provided evidence of similar
performance of screening policy with real and
artificial deception, suggesting the possibility of
using artificially generated deception to reduce the
costs associated with obtaining training data.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Robb:2012:INU,
author = "David A. Robb and Paul L. Bowen and A. Faye Borthick
and Fiona H. Rohde",
title = "Improving New Users' Query Performance: Deterring
Premature Stopping of Query Revision with Information
for Forming Ex Ante Expectations",
journal = j-JDIQ,
volume = "3",
number = "4",
pages = "7:1--7:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348828.2348829",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:14 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "As the volume of data in organizational databases
grows, organizations are seeking to use this data to
improve organizational success. To this end, users are
being asked to query these databases to provide
information to help answer questions posed by key
management personnel. Users who have had extensive
experience with an organization's data can often detect
the presence of errors in their queries when query
results do not correspond to their ex ante
expectations. New users, however, are less familiar
with the data they will be querying. Having no, or
limited, ex ante expectations for query results, new
users may be unaware that the result produced by their
query is incorrect. Unwarranted confidence in the
correctness of their queries predisposes these users to
stop looking for query errors even when their queries
still contain errors. This behavior, premature stopping
of query revision, prompts investigating whether new
users' query performance would improve if they were not
only provided with, but used, readily available
information to form ex ante expectations. Our results
demonstrated a threshold effect in new users heeding
information for forming ex ante expectations. That is,
the mere availability of information for forming ex
ante expectations made no difference in query
performance. When admonishing users to heed ex ante
information, however, there was an associated increase
in the accuracy of their queries. These results suggest
that users unfamiliar with a particular database might
make fewer query errors if they not only received
readily available information but were then prompted to
use the information to form ex ante expectations for
query results.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Varol:2012:HMA,
author = "Cihan Varol and Coskun Bayrak",
title = "Hybrid Matching Algorithm for Personal Names",
journal = j-JDIQ,
volume = "3",
number = "4",
pages = "8:1--8:??",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2348828.2348830",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:14 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib;
https://www.math.utah.edu/pub/tex/bib/spell.bib",
abstract = "Companies acquire personal information from phone,
World Wide Web, or email in order to sell or send an
advertisement about their product. However, when this
information is acquired, moved, copied, or edited, the
data may lose its quality. Often, the use of data
administrators or a tool that has limited capabilities
to correct the mistyped information can cause many
problems. Moreover, most of the correction techniques
are particularly implemented for the words used in
daily conversations. Since personal names have
different characteristics compared to general text, a
hybrid matching algorithm (PNRS) which employs phonetic
encoding, string matching and statistical facts to
provide a possible candidate for misspelled names is
developed. At the end, the efficiency of the proposed
algorithm is compared with other well known spelling
correction techniques.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{ODonoghue:2012:ISI,
author = "John O'Donoghue and Jane Grimson and Katherine
Seelman",
title = "Introduction to the Special Issue on Information
Quality: The Challenges and Opportunities in Healthcare
Systems and Services",
journal = j-JDIQ,
volume = "4",
number = "1",
pages = "1:1--1:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2378016.2378017",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:14 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Collins:2012:CGF,
author = "Claire Collins and Kelly Janssens",
title = "Creating a General (Family) Practice Epidemiological
Database in {Ireland} --- Data Quality Issue
Management",
journal = j-JDIQ,
volume = "4",
number = "1",
pages = "2:1--2:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2378016.2378018",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:14 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In Ireland, while detailed information is available
regarding hospital attendance, little is known
regarding general (family) practice attendance.
However, it is conservatively estimated that there are
almost nine times as many general practice encounters
than there are hospital encounters each year in
Ireland. This represents a very significant gap in
health information. Indeed, general practice has been
shown in other countries to be an important and rich
source of information about the health of the
population, their behaviors and their utilization of
health services. Funded by the Health Information and
Quality Authority (HIQA), the Irish College of General
Practitioners (ICGP) undertook a feasibility study of
diagnostic coding of routinely entered patient data and
the creation of a national general practice morbidity
and epidemiological database (GPMED project). This
article outlines the process of data quality issue
management undertaken. The study's findings suggest
that the quality of data collection and reporting
structures available in general practice throughout
Ireland at the outset of this project were not adequate
to permit the creation of a database of sufficient
quality for service planning and policy or
epidemiological research. Challenges include the dearth
of a minimum standard of data recorded in consultations
by GPs and the absence of the digital data recording
and exporting infrastructure within Irish patient
management software systems. In addition, there is at
present a lack of recognition regarding the value of
such data for patient management and service
planning---including importantly, data collectors who
do not fully accept the merit of maintaining data,
which has a direct consequence for data quality. The
work of this project has substantial implications for
the data available to the health sector in Ireland and
contributes to the knowledge base internationally
regarding general practice morbidity data.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Cure:2012:IDQ,
author = "Olivier Cur{\'e}",
title = "Improving the Data Quality of Drug Databases using
Conditional Dependencies and Ontologies",
journal = j-JDIQ,
volume = "4",
number = "1",
pages = "3:1--3:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2378016.2378019",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:14 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Many health care systems and services exploit drug
related information stored in databases. The poor data
quality of these databases, e.g. inaccuracy of drug
contraindications, can lead to catastrophic
consequences for the health condition of patients.
Hence it is important to ensure their quality in terms
of data completeness and soundness. In the database
domain, standard Functional Dependencies (FDs) and
INclusion Dependencies (INDs), have been proposed to
prevent the insertion of incorrect data. But they are
generally not expressive enough to represent a
domain-specific set of constraints. To this end,
conditional dependencies, i.e. standard dependencies
extended with tableau patterns containing constant
values, have been introduced and several methods have
been proposed for their discovery and representation.
The quality of drug databases can be considerably
improved by their usage. Moreover, pharmacology
information is inherently hierarchical and many
standards propose graph structures to represent them,
e.g. the Anatomical Therapeutic Chemical classification
(ATC) or OpenGalen's terminology. In this article, we
emphasize that the technologies of the Semantic Web are
adapted to represent these hierarchical structures,
i.e. in RDFS and OWL. We also present a solution for
representing conditional dependencies using a query
language defined for these graph oriented structures,
namely SPARQL. The benefits of this approach are
interoperability with applications and ontologies of
the Semantic Web as well as a reasoning-based query
execution solution to clean underlying databases.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{McNaull:2012:DIQ,
author = "James McNaull and Juan Carlos Augusto and Maurice
Mulvenna and Paul McCullagh",
title = "Data and Information Quality Issues in Ambient
Assisted Living Systems",
journal = j-JDIQ,
volume = "4",
number = "1",
pages = "4:1--4:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2378016.2378020",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:14 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Demographic aging, as a result of people living for
longer, has put an increased burden on health and
social care provision across most of the economies of
the developed and developing world. In order to cope
with the greater numbers of older people, together with
increasing prevalence of chronic diseases, governments
are looking to new ways to provide care and support to
older people and their care providers. A growing trend
is where health and social care providers are moving
towards the use of assisted living technologies to
provide care and assistance in the home. In this
article, the research area of Ambient Assisted Living
(AAL) systems is examined and the data, information and
the higher-level contextual knowledge quality issues in
relation to these systems, is discussed. Lack of
quality control may result in an AAL system providing
assistance and support based upon incorrect data,
information and knowledge inputs, and this may have a
detrimental effect on the person making use of the
system. We propose a model whereby contextual knowledge
gained during the AAL system's reasoning cycle can be
fed back to aid in further quality checking at the
various architectural layers, and a realistic AAL
scenario is provided to support this. Future research
should be conducted in these areas, with the
requirement of building quality criteria into the
design and implementation of AAL systems.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{ODonoghue:2012:DMW,
author = "John O'Donoghue and John Herbert",
title = "Data Management within {mHealth} Environments: Patient
Sensors, Mobile Devices, and Databases",
journal = j-JDIQ,
volume = "4",
number = "1",
pages = "5:1--5:??",
month = oct,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2378016.2378021",
ISSN = "1936-1955",
bibdate = "Thu Nov 8 18:27:14 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Pervasive environments generate large quantities of
data, originating from backend servers, portable
devices, and wireless mobile sensors. Pervasive sensing
devices that monitor properties of the environment
(including human beings) can be a large data source.
Unprocessed datasets may include data that is faulty
and irrelevant, and data that is important and useful.
If not managed correctly the large amount of data from
a data-rich pervasive environment may result in
information overload or delivery of incorrect
information. Context-sensitive quality data management
aims to gather, verify, process, and manage the
multiple data sources in a pervasive environment in
order to deliver high quality, relevant information to
the end-user. Managing the quality of data from
different sources, correlating related data, and making
use of context, are all essential in providing end
users with accurate and meaningful data in real time.
This requirement is especially true for critical
applications such as in a medical environment. This
article presents the Data Management System (DMS)
architecture. It is designed to deliver quality data
service to its users. The DMS architecture employs an
agent-based middleware to intelligently and effectively
manage all pervasive data sources, and to make use of
context to deliver relevant information to the
end-user. Two of the DMS components are presented: (1)
data validation and (2) data consistency. The DMS
components have been rigorously evaluated using various
medical-based test cases. This article demonstrates a
careful, precise approach to data based on the quality
of the data and the context of its use. It emphasises
the DMS architecture and the role of software agents in
providing quality data management.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Talburt:2013:SIE,
author = "John R. Talburt",
title = "Special Issue on Entity Resolution Overview: The
Criticality of Entity Resolution in Data and
Information Quality",
journal = j-JDIQ,
volume = "4",
number = "2",
pages = "6:1--6:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435221.2435222",
ISSN = "1936-1955",
bibdate = "Sat Jun 22 12:13:00 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Song:2013:DIE,
author = "Dezhao Song and Jeff Heflin",
title = "Domain-Independent Entity Coreference for Linking
Ontology Instances",
journal = j-JDIQ,
volume = "4",
number = "2",
pages = "7:1--7:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435221.2435223",
ISSN = "1936-1955",
bibdate = "Sat Jun 22 12:13:00 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "The objective of entity coreference is to determine if
different mentions (e.g., person names, place names,
database records, ontology instances, etc.) refer to
the same real word object. Entity coreference
algorithms can be used to detect duplicate database
records and to determine if two Semantic Web instances
represent the same underlying real word entity. The key
issues in developing an entity coreference algorithm
include how to locate context information and how to
utilize the context appropriately. In this article, we
present a novel entity coreference algorithm for
ontology instances. For scalability reasons, we select
a neighborhood of each instance from an RDF graph. To
determine the similarity between two instances, our
algorithm computes the similarity between comparable
property values in the neighborhood graphs. The
similarity of distinct URIs and blank nodes is computed
by comparing their outgoing links. In an attempt to
reduce the impact of distant nodes on the final
similarity measure, we explore a distance-based
discounting approach. To provide the best possible
domain-independent matches, we propose an approach to
compute the discriminability of triples in order to
assign weights to the context information. We evaluated
our algorithm using different instance categories from
five datasets. Our experiments show that the best
results are achieved by including both our discounting
and triple discrimination approaches.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Nuray-Turan:2013:ACS,
author = "Rabia Nuray-Turan and Dmitri V. Kalashnikov and Sharad
Mehrotra",
title = "Adaptive Connection Strength Models for
Relationship-Based Entity Resolution",
journal = j-JDIQ,
volume = "4",
number = "2",
pages = "8:1--8:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435221.2435224",
ISSN = "1936-1955",
bibdate = "Sat Jun 22 12:13:00 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Entity Resolution (ER) is a data quality challenge
that deals with ambiguous references in data and whose
task is to identify all references that co-refer. Due
to practical significance of the ER problem, many
creative ER techniques have been proposed in the past,
including those that analyze relationships that exist
among entities in data. Such approaches view the
database as an entity-relationship graph, where direct
and indirect relationships correspond to paths in the
graph. These techniques rely on measuring the
connection strength among various nodes in the graph by
using a connection strength (CS) model. While such
approaches have demonstrated significant advantage over
traditional ER techniques, currently they also have a
significant limitation: the CS models that they use are
intuition-based fixed models that tend to behave well
in general, but are very generic and not tuned to a
specific domain, leading to suboptimal result quality.
Hence, in this article we propose an approach that
employs supervised learning to adapt the connection
strength measure to the given domain using the
available past/training data. The adaptive approach has
several advantages: it increases both the quality and
efficiency of ER and it also minimizes the domain
analyst participation needed to tune the CS model to
the given domain. The extensive empirical evaluation
demonstrates that the proposed approach reaches up to
8\% higher accuracy than the graph-based ER methods
that use fixed and intuition-based CS models.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Panse:2013:IHU,
author = "Fabian Panse and Maurice van Keulen and Norbert
Ritter",
title = "Indeterministic Handling of Uncertain Decisions in
Deduplication",
journal = j-JDIQ,
volume = "4",
number = "2",
pages = "9:1--9:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435221.2435225",
ISSN = "1936-1955",
bibdate = "Sat Jun 22 12:13:00 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In current research and practice, deduplication is
usually considered as a deterministic approach in which
database tuples are either declared to be duplicates or
not. In ambiguous situations, however, it is often not
completely clear-cut, which tuples represent the same
real-world entity. In deterministic approaches, many
realistic possibilities may be ignored, which in turn
can lead to false decisions. In this article, we
present an indeterministic approach for deduplication
by using a probabilistic target model including
techniques for proper probabilistic interpretation of
similarity matching results. Thus, instead of deciding
for one of the most likely situations, all realistic
situations are modeled in the resultant data. This
approach minimizes the negative impact of false
decisions. Moreover, the deduplication process becomes
almost fully automatic and human effort can be largely
reduced. To increase applicability, we introduce
several semi-indeterministic methods that heuristically
reduce the set of indeterministically handled decisions
in several meaningful ways. We also describe a
full-indeterministic method for theoretical and
presentational reasons.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Zhou:2013:GLC,
author = "Yinle Zhou and Eric Nelson and Fumiko Kobayashi and
John R. Talburt",
title = "A Graduate-Level Course on Entity Resolution and
Information Quality: a Step toward {ER} Education",
journal = j-JDIQ,
volume = "4",
number = "2",
pages = "10:1--10:??",
month = mar,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2435221.2435226",
ISSN = "1936-1955",
bibdate = "Sat Jun 22 12:13:00 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "This article discusses the topics, approaches, and
lessons learned in teaching a graduate-level course
covering entity resolution (ER) and its relationship to
information quality (IQ). The course surveys a broad
spectrum of ER topics and activities including entity
reference extraction, entity reference preparation,
entity reference resolution techniques, entity identity
management, and entity relationship analysis. The
course content also attempts to balance aspects of ER
theory with practical application through a series of
laboratory exercises coordinated with the lecture
topics. As an additional teaching aid, a configurable,
open-source entity resolution engine (OYSTER) was
developed that allows students to experience with
different types of ER architectures including
merge-purge, record linking, identity resolution, and
identity capture.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Cao:2013:NAD,
author = "Lan Cao and Hongwei Zhu",
title = "Normal accidents: Data quality problems in
{ERP}-enabled manufacturing",
journal = j-JDIQ,
volume = "4",
number = "3",
pages = "11:1--11:??",
month = may,
year = "2013",
CODEN = "????",
ISSN = "1936-1955",
bibdate = "Sat Jun 22 12:13:05 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "The efficient operation of Enterprise Resource
Planning (ERP) systems largely depends on data quality.
ERP can improve data quality and information sharing
within an organization. It can also pose challenges to
data quality. While it is well known that data quality
is important in ERP systems, most existing research has
focused on identifying the factors affecting the
implementation and the business values of ERP. With
normal accident theory as a theoretical lens, we
examine data quality problems in ERP using a case study
of a large, fast-growing multinational manufacturer
headquartered in China. Our findings show that
organizations that have successfully implemented ERP
can still experience certain data quality problems. We
identify major data quality problems in data
production, storage and maintenance, and utilization
processes. We also analyze the causes of these data
quality problems by linking them to certain
characteristics of ERP systems within an organizational
context. Our analysis shows that problems resulting
from the tight coupling effects and the complexity of
ERP-enabled manufacturing systems can be inevitable.
This study will help researchers and practitioners
formulate data management strategies that are effective
in the presence of certain ``normal'' data quality
problems.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Biran:2013:CII,
author = "Dov Biran and Michael H. Zack and Richard J. Briotta",
title = "Competitive intelligence and information quality: a
game-theoretic perspective",
journal = j-JDIQ,
volume = "4",
number = "3",
pages = "12:1--12:??",
month = may,
year = "2013",
CODEN = "????",
ISSN = "1936-1955",
bibdate = "Sat Jun 22 12:13:05 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "To better understand a competitor's tactical and
strategic plans, companies need to take a closer look
at competitive intelligence or they risk missing
lucrative opportunities. Because of this there is a
growing interest in competitive intelligence and
intelligence information gathering systems (IIS). This
article uses game-theoretic concepts to develop an
analytic framework to assess the value of deploying a
competitive intelligence gathering information system.
Modeling the competitive environment as a game provides
a useful approach to study and evaluate competitive
strategies given diverse assumptions about the quality
of the information known by the players. When
determining the value of deploying an IIS, decision
makers need to examine three components of the
competitive environment: the competitive rules of the
game, the state of player knowledge, and the
reliability of the information gathered. This framework
focuses on competitive environments where the players'
state of knowledge (i.e., common versus covert
knowledge) and the reliability of the information
generated are essential to the decision making process.
The article concludes with implications for research
and practice.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Joglekar:2013:AAD,
author = "Nitin R. Joglekar and Edward G. Anderson and G.
Shankaranarayanan",
title = "Accuracy of aggregate data in distributed project
settings: Model, analysis and implications",
journal = j-JDIQ,
volume = "4",
number = "3",
pages = "13:1--13:??",
month = may,
year = "2013",
CODEN = "????",
ISSN = "1936-1955",
bibdate = "Sat Jun 22 12:13:05 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "We examine the management of data accuracy in
inter-organizational data exchanges using the context
of distributed software projects. Organizations
typically manage projects by outsourcing portions of
the project to partners. Managing a portfolio of such
projects requires sharing data regarding the status of
work-in-progress residing with the partners and
estimates of these projects' completion times.
Portfolio managers use these data to assign projects to
be outsourced to partners. These data are rarely
accurate. Unless these data are filtered, inaccuracies
can lead to myopic and expensive sourcing decisions. We
develop a model that uses project-status data to
identify an optimal assignment of projects to be
outsourced. This model permits corruption of
project-status data. We use this model to compute the
costs of using perfect versus inaccurate project-status
data and show that the costs of deviation from optimal
are sizable when the inaccuracy in the data is
significant. We further propose a filter to correct
inaccurate project-status data and generate an estimate
of true progress. With this filter, depending on the
relative magnitudes of errors, we show that accuracy of
project-status data can be improved and the associated
economic benefit is significant. We illustrate the
improvement in accuracy and associated economic benefit
by instantiating the model and the filter. We further
elaborate on how the model parameters may be estimated
and used in practice.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Raschid:2014:E,
author = "Louiqa Raschid",
title = "Editorial",
journal = j-JDIQ,
volume = "4",
number = "4",
pages = "14:1--14:??",
month = may,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2579167",
ISSN = "1936-1955",
bibdate = "Tue May 27 16:54:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Wijnhoven:2014:VBF,
author = "Fons Wijnhoven and Chintan Amrit and Pim Dietz",
title = "Value-Based File Retention: File Attributes as File
Value and Information Waste Indicators",
journal = j-JDIQ,
volume = "4",
number = "4",
pages = "15:1--15:??",
month = may,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567656",
ISSN = "1936-1955",
bibdate = "Tue May 27 16:54:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Several file retention policy methods propose that a
file retention policy should be based on file value.
Though such a retention policy might increase the value
of accessible files, the method to arrive at such a
policy is under-researched. This article discusses how
one can arrive at a method for developing file
retention policies based on the use values of files.
The method's applicability is initially assessed
through a case study at Capgemini, Netherlands. In the
case study, we hypothesize that one can develop a file
retention policy by testing causal relations between
file attributes (as used by file retention methods) and
the use value of files. Unfortunately, most file
attributes used by file retention methods have a weak
correlation with file value, resulting in the
conclusion that these methods do not well select out
high- and low-value files. This would imply the
ineffectiveness of the used attributes in our study or
errors in our conceptualization of file value. We
continue with the last possibility and develop
indicators for file utility (with low utility being
waste). With this approach we were able to detect waste
files, in a sample of files, with an accuracy of 80\%.
We therefore not only suggest further research in
information waste detection as part of a file retention
policy, but also to further explore other file
attributes that could better predict file value and
file utility.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Fan:2014:IBR,
author = "Wenfei Fan and Shuai Ma and Nan Tang and Wenyuan Yu",
title = "Interaction between Record Matching and Data
Repairing",
journal = j-JDIQ,
volume = "4",
number = "4",
pages = "16:1--16:??",
month = may,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567657",
ISSN = "1936-1955",
bibdate = "Tue May 27 16:54:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Central to a data cleaning system are record matching
and data repairing. Matching aims to identify tuples
that refer to the same real-world object, and repairing
is to make a database consistent by fixing errors in
the data by using integrity constraints. These are
typically treated as separate processes in current data
cleaning systems, based on heuristic solutions. This
article studies a new problem in connection with data
cleaning, namely the interaction between record
matching and data repairing. We show that repairing can
effectively help us identify matches, and vice versa.
To capture the interaction, we provide a uniform
framework that seamlessly unifies repairing and
matching operations to clean a database based on
integrity constraints, matching rules, and master data.
We give a full treatment of fundamental problems
associated with data cleaning via matching and
repairing, including the static analyses of constraints
and rules taken together, and the complexity,
termination, and determinism analyses of data cleaning.
We show that these problems are hard, ranging from
NP-complete or coNP-complete, to PSPACE-complete.
Nevertheless, we propose efficient algorithms to clean
data via both matching and repairing. The algorithms
find deterministic fixes and reliable fixes based on
confidence and entropy analyses, respectively, which
are more accurate than fixes generated by heuristics.
Heuristic fixes are produced only when deterministic or
reliable fixes are unavailable. We experimentally
verify that our techniques can significantly improve
the accuracy of record matching and data repairing that
are taken as separate processes, using real-life and
synthetic data.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Martin:2014:MAE,
author = "Nigel Martin and Alexandra Poulovassilis and Jianing
Wang",
title = "A Methodology and Architecture Embedding Quality
Assessment in Data Integration",
journal = j-JDIQ,
volume = "4",
number = "4",
pages = "17:1--17:??",
month = may,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2567663",
ISSN = "1936-1955",
bibdate = "Tue May 27 16:54:25 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Data integration aims to combine heterogeneous
information sources and to provide interfaces for
accessing the integrated resource. Data integration is
a collaborative task that may involve many people with
different degrees of experience, knowledge of the
application domain, and expectations relating to the
integrated resource. It may be difficult to determine
and control the quality of an integrated resource due
to these factors. In this article, we propose a data
integration methodology that has embedded within it
iterative quality assessment and improvement of the
integrated resource. We also propose an architecture
for the realisation of this methodology. The quality
assessment is based on an ontology representation of
different users' quality requirements and of the main
elements of the integrated resource. We use description
logic as the formal basis for reasoning about users'
quality requirements and for validating that an
integrated resource satisfies these requirements. We
define quality factors and associated metrics which
enable the quality of alternative global schemas for an
integrated resource to be assessed quantitatively, and
hence the improvement which results from the refinement
of a global schema following our methodology to be
measured. We evaluate our approach through a
large-scale real-life case study in biological data
integration in which an integrated resource is
constructed from three autonomous proteomics data
sources.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Naumann:2014:E,
author = "Felix Naumann",
title = "Editorial",
journal = j-JDIQ,
volume = "5",
number = "1--2",
pages = "1:1--1:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2648781",
ISSN = "1936-1955",
bibdate = "Mon Sep 8 08:45:58 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Talburt:2014:IQR,
author = "John Talburt and Therese L. Williams and Thomas C.
Redman and David Becker",
title = "Information quality research challenge: Predicting and
quantifying the impact of social issues on information
quality programs",
journal = j-JDIQ,
volume = "5",
number = "1--2",
pages = "2:1--2:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629603",
ISSN = "1936-1955",
bibdate = "Mon Sep 8 08:45:58 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Rahm:2014:DPC,
author = "Erhard Rahm",
title = "Discovering product counterfeits in online shops: a
big data integration challenge",
journal = j-JDIQ,
volume = "5",
number = "1--2",
pages = "3:1--3:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629605",
ISSN = "1936-1955",
bibdate = "Mon Sep 8 08:45:58 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Christen:2014:CPP,
author = "Peter Christen and Dinusha Vatsalan and Vassilios S.
Verykios",
title = "Challenges for privacy preservation in data
integration",
journal = j-JDIQ,
volume = "5",
number = "1--2",
pages = "4:1--4:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629604",
ISSN = "1936-1955",
bibdate = "Mon Sep 8 08:45:58 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Techniques for integrating data from diverse sources
have attracted significant interest in recent years.
Much of today's data collected by businesses and
governments are about people, and integrating such data
across organizations can raise privacy concerns.
Various techniques that preserve privacy during data
integration have been developed, but several challenges
persist that need to be solved before such techniques
become useful in practical applications. We elaborate
on these challenges and discuss research directions.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Vogel:2014:RGA,
author = "Tobias Vogel and Arvid Heise and Uwe Draisbach and
Dustin Lange and Felix Naumann",
title = "Reach for gold: an annealing standard to evaluate
duplicate detection results",
journal = j-JDIQ,
volume = "5",
number = "1--2",
pages = "5:1--5:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629687",
ISSN = "1936-1955",
bibdate = "Mon Sep 8 08:45:58 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Duplicates in a database are one of the prime causes
of poor data quality and are at the same time among the
most difficult data quality problems to alleviate. To
detect and remove such duplicates, many commercial and
academic products and methods have been developed. The
evaluation of such systems is usually in need of
pre-classified results. Such gold standards are often
expensive to come by (much manual classification is
necessary), not representative (too small or too
synthetic), and proprietary and thus preclude
repetition (company-internal data). This lament has
been uttered in many papers and even more paper
reviews. The proposed annealing standard is a
structured set of duplicate detection results, some of
which are manually verified and some of which are
merely validated by many classifiers. As more and more
classifiers are evaluated against the annealing
standard, more and more results are verified and
validation becomes more and more confident. We formally
define gold, silver, and the annealing standard and
their maintenance. Experiments show how quickly an
annealing standard converges to a gold standard.
Finally, we provide an annealing standard for 750,000
CDs to the duplicate detection community.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Fan:2014:CRD,
author = "Wenfei Fan and Floris Geerts and Nan Tang and Wenyuan
Yu",
title = "Conflict resolution with data currency and
consistency",
journal = j-JDIQ,
volume = "5",
number = "1--2",
pages = "6:1--6:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2631923",
ISSN = "1936-1955",
bibdate = "Mon Sep 8 08:45:58 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "This article introduces a new approach for conflict
resolution: given a set of tuples pertaining to the
same entity, it identifies a single tuple in which each
attribute has the latest and consistent value in the
set. This problem is important in data integration,
data cleaning, and query answering. It is, however,
challenging since in practice, reliable time stamps are
often absent, among other things. We propose a model
for conflict resolution by specifying data currency in
terms of partial currency orders and currency
constraints and by enforcing data consistency with
constant conditional functional dependencies. We show
that identifying data currency orders helps us repair
inconsistent data, and vice versa. We investigate a
number of fundamental problems associated with conflict
resolution and establish their complexity. In addition,
we introduce a framework and develop algorithms for
conflict resolution by integrating data currency and
consistency inferences into a single process and by
interacting with users. We experimentally verify the
accuracy and efficiency of our methods using real-life
and synthetic data.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Glowalla:2014:PDD,
author = "Paul Glowalla and Ali Sunyaev",
title = "Process-driven data quality management: a critical
review on the application of process modeling
languages",
journal = j-JDIQ,
volume = "5",
number = "1--2",
pages = "7:1--7:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629568",
ISSN = "1936-1955",
bibdate = "Mon Sep 8 08:45:58 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Data quality is critical to organizational success. In
order to improve and sustain data quality in the long
term, process-driven data quality management (PDDQM)
seeks to redesign processes that create or modify data.
Consequently, process modeling is mandatory for PDDQM.
Current research examines process modeling languages
with respect to representational capabilities. However,
there is a gap, since process modeling languages for
PDDQM are not considered. We address this research gap
by providing a synthesis of the varying applications of
process modeling languages for PDDQM. We conducted a
keyword-based literature review in conferences as well
as 74 highranked information systems and computer
science journals, reviewing 1,555 articles from 1995
onwards. For practitioners, it is possible to integrate
the quality perspective within broadly applied process
models. For further research, we derive
representational requirements for PDDQM that should be
integrated within existing process modeling languages.
However, there is a need for further representational
analysis to examine the adequacy of upcoming process
modeling languages. New or enhanced process modeling
languages may substitute for PDDQM-specific process
modeling languages and facilitate development of a
broadly applicable and accepted process modeling
language for PDDQM.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Belhajjame:2015:E,
author = "Khalid Belhajjame and Domenico Beneventano and Laure
Berti-Equille and James Cheney and Victor Cuevas and
Tom {De Nies} and Helena Galhardas and Ashish Gehani
and Boris Glavic and Paul Groth and Olaf Hartig and
Scott Jensen and Andrea Maurino and Gianni Mecca and
Renee Miller and Luc Moreau and Mourad Ouzzani and
Jaehong Park",
title = "Editorial",
journal = j-JDIQ,
volume = "5",
number = "3",
pages = "8:1--8:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2692312",
ISSN = "1936-1955",
bibdate = "Tue Mar 3 14:42:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Cheah:2015:PQA,
author = "You-Wei Cheah and Beth Plale",
title = "Provenance Quality Assessment Methodology and
Framework",
journal = j-JDIQ,
volume = "5",
number = "3",
pages = "9:1--9:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2665069",
ISSN = "1936-1955",
bibdate = "Tue Mar 3 14:42:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Data provenance, a form of metadata describing the
life cycle of a data product, is crucial in the sharing
of research data. Research data, when shared over
decades, requires recipients to make a determination of
both use and trust. That is, can they use the data?
More importantly, can they trust it? Knowing the data
are of high quality is one factor to establishing
fitness for use and trust. Provenance can be used to
assert the quality of the data, but the quality of the
provenance must be known as well. We propose a
framework for assessing the quality of data provenance.
We identify quality issues in data provenance,
establish key quality dimensions, and define a
framework of analysis. We apply the analysis framework
to synthetic and real-world provenance.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Herschel:2015:HAA,
author = "Melanie Herschel",
title = "A Hybrid Approach to Answering Why-Not Questions on
Relational Query Results",
journal = j-JDIQ,
volume = "5",
number = "3",
pages = "10:1--10:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2665070",
ISSN = "1936-1955",
bibdate = "Tue Mar 3 14:42:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In analyzing and debugging data transformations, or
more specifically relational queries, a subproblem is
to understand why some data are not part of the query
result. This problem has recently been addressed from
different perspectives for various fragments of
relational queries. The different perspectives yield
different yet complementary explanations of such
missing answers. This article first aims at unifying
the different approaches by defining a new type of
explanation, called hybrid explanation, that
encompasses the variety of previously defined types of
explanations. This solution goes beyond simply forming
the union of explanations produced by different
algorithms and is shown to be able to explain a larger
set of missing answers. Second, we present Conseil, an
algorithm to generate hybrid explanations. Conseil is
also the first algorithm to handle nonmonotonic
queries. Experiments on efficiency and explanation
quality show that Conseil is comparable and even
outperforms previous algorithms. This article extends a
previous short conference paper by providing proofs,
additional theorems, and a detailed discussion of each
step of the Conseil algorithm. It also significantly
extends the experimental evaluation on efficiency and
explanation quality.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Chong:2015:SID,
author = "Stephen Chong and Christian Skalka and Jeffrey A.
Vaughan",
title = "Self-Identifying Data for Fair Use",
journal = j-JDIQ,
volume = "5",
number = "3",
pages = "11:1--11:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2687422",
ISSN = "1936-1955",
bibdate = "Tue Mar 3 14:42:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Public-use earth science datasets are a useful
resource with the unfortunate feature that their
provenance is easily disconnected from their content.
``Fair-use policies'' typically associated with these
datasets require appropriate attribution of providers
by users, but sound and complete attribution is
difficult if provenance information is lost. To address
this, we introduce a technique to directly associate
provenance information with sensor datasets. Our
technique is similar to traditional watermarking but is
intended for application to unstructured time-series
datasets. Our approach is potentially imperceptible
given sufficient margins of error in datasets and is
robust to a number of benign but likely transformations
including truncation, rounding, bit-flipping, sampling,
and reordering. We provide algorithms for both one-bit
and blind mark checking and show how our system can be
adapted to various data representation types. Our
algorithms are probabilistic in nature and are
characterized by both combinatorial and empirical
analyses. Mark embedding can be applied at any point in
the data life cycle, allowing adaptation of our scheme
to social or scientific concerns.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Baillie:2015:QPA,
author = "Chris Baillie and Peter Edwards and Edoardo Pignotti",
title = "{QUAL}: a Provenance-Aware Quality Model",
journal = j-JDIQ,
volume = "5",
number = "3",
pages = "12:1--12:??",
month = feb,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700413",
ISSN = "1936-1955",
bibdate = "Tue Mar 3 14:42:39 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In this article, we present a model for quality
assessment over linked data. This model has been
designed to align with emerging standards for
provenance on the Web to enable agents to reason about
data provenance when performing quality assessment. The
model also enables quality assessment provenance to be
represented, thus allowing agents to make decisions
about reuse of existing assessments. We also discuss
the development of an OWL ontology as part of a
software framework to support reasoning about data
quality and assessment reuse. Finally, we evaluate this
framework using two real-world case studies derived
from transport and invasive-species monitoring
applications.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Attenberg:2015:BMC,
author = "Joshua Attenberg and Panos Ipeirotis and Foster
Provost",
title = "Beat the Machine: Challenging Humans to Find a
Predictive Model's ``Unknown Unknowns''",
journal = j-JDIQ,
volume = "6",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2700832",
ISSN = "1936-1955",
bibdate = "Thu Mar 5 07:53:50 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "We present techniques for gathering data that expose
errors of automatic predictive models. In certain
common settings, traditional methods for evaluating
predictive models tend to miss rare but important
errors --- most importantly, cases for which the model
is confident of its prediction (but wrong). In this
article, we present a system that, in a game-like
setting, asks humans to identify cases that will cause
the predictive model-based system to fail. Such
techniques are valuable in discovering problematic
cases that may not reveal themselves during the normal
operation of the system and may include cases that are
rare but catastrophic. We describe the design of the
system, including design iterations that did not quite
work. In particular, the system incentivizes humans to
provide examples that are difficult for the model to
handle by providing a reward proportional to the
magnitude of the predictive model's error. The humans
are asked to ``Beat the Machine'' and find cases where
the automatic model (``the Machine'') is wrong.
Experiments show that the humans using Beat the Machine
identify more errors than do traditional techniques for
discovering errors in predictive models, and, indeed,
they identify many more errors where the machine is
(wrongly) confident it is correct. Furthermore, those
cases the humans identify seem to be not simply
outliers, but coherent areas missed completely by the
model. Beat the Machine identifies the ``unknown
unknowns.'' Beat the Machine has been deployed at an
industrial scale by several companies. The main impact
has been that firms are changing their perspective on
and practice of evaluating predictive models. ``There
are known knowns. These are things we know that we
know. There are known unknowns. That is to say, there
are things that we know we don't know. But there are
also unknown unknowns. There are things we don't know
we don't know.'' --- Donald Rumsfeld",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Alonso:2015:CLQ,
author = "Omar Alonso",
title = "Challenges with Label Quality for Supervised
Learning",
journal = j-JDIQ,
volume = "6",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2724721",
ISSN = "1936-1955",
bibdate = "Thu Mar 5 07:53:50 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Organizations that develop and use technologies around
information retrieval, machine learning, recommender
systems, and natural language processing depend on
labels for engineering and experimentation. These
labels, usually gathered via human computation, are
used in machine-learned models for prediction and
evaluation purposes. In such scenarios, collecting
high-quality labels is a very important part of the
overall process. We elaborate on these challenges and
discuss research directions.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Lukyanenko:2015:IQR,
author = "Roman Lukyanenko and Jeffrey Parsons",
title = "Information Quality Research Challenge: Adapting
Information Quality Principles to User-Generated
Content",
journal = j-JDIQ,
volume = "6",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2723166",
ISSN = "1936-1955",
bibdate = "Thu Mar 5 07:53:50 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Naumann:2015:E,
author = "Felix Naumann",
title = "Editorial",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "4:1--4:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2762716",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Varshney:2015:DCD,
author = "Kush R. Varshney and Dennis Wei and Karthikeyan
Natesan Ramamurthy and Aleksandra Mojsilovi{\'c}",
title = "Data Challenges in Disease Response: The 2014 {Ebola}
Outbreak and Beyond",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "5:1--5:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2742550",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Barnaghi:2015:CQD,
author = "Payam Barnaghi and Maria Bermudez-Edo and Ralf
T{\"o}njes",
title = "Challenges for Quality of Data in Smart Cities",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "6:1--6:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2747881",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Grant:2015:CLT,
author = "Christan Earl Grant and Daisy Zhe Wang",
title = "A Challenge for Long-Term Knowledge Base Maintenance",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "7:1--7:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2738044",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Sha:2015:DQC,
author = "Kewei Sha and Sherali Zeadally",
title = "Data Quality Challenges in Cyber-Physical Systems",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "8:1--8:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2740965",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Gennari:2015:CQT,
author = "Rosella Gennari and Sara Tonelli and Pierpaolo
Vittorini",
title = "Challenges in Quality of Temporal Data --- Starting
with Gold Standards",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "9:1--9:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2736699",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Basole:2015:DAC,
author = "Rahul C. Basole and Mark L. Braunstein and Jimeng
Sun",
title = "Data and Analytics Challenges for a Learning
Healthcare System",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "10:1--10:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2755489",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Todoran:2015:MEI,
author = "Ion-George Todoran and Laurent Lecornu and Ali
Khenchaf and Jean-Marc {Le Caillec}",
title = "A Methodology to Evaluate Important Dimensions of
Information Quality in Systems",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "11:1--11:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2744205",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Assessing the quality of the information proposed by
an information system has become one of the major
research topics in the last two decades. A quick
literature survey shows that a significant number of
information quality frameworks are proposed in
different domains of application: management
information systems, web information systems,
information fusion systems, and so forth.
Unfortunately, they do not provide a feasible
methodology that is both simple and intuitive to be
implemented in practice. In order to address this need,
we present in this article a new information quality
methodology. Our methodology makes use of existing
frameworks and proposes a three-step process capable of
tracking the quality changes through the system. In the
first step and as a novelty compared to existing
studies, we propose decomposing the information system
into its elementary modules. Having access to each
module allows us to locally define the information
quality. Then, in the second step, we model each
processing module by a quality transfer function,
capturing the module's influence over the information
quality. In the third step, we make use of the previous
two steps in order to estimate the quality of the
entire information system. Thus, our methodology allows
informing the end-user on both output quality and local
quality. The proof of concept of our methodology has
been carried out considering two applications: an
automatic target recognition system and a diagnosis
coding support system.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Zarraga-Rodriguez:2015:EID,
author = "Marta Zarraga-Rodriguez and M. Jesus Alvarez",
title = "Experience: Information Dimensions Affecting
Employees' Perceptions Towards Being Well Informed",
journal = j-JDIQ,
volume = "6",
number = "2--3",
pages = "12:1--12:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2774223",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Information is a strategic company resource, but there
is no consensus in the literature regarding the set of
dimensions to be considered when measuring the quality
of the information. Most measures of information
quality depend on user perception. Using multiple
correlation analysis, we obtain a model that allows us
to explain how information quality dimensions influence
information consumers' overall feeling of being well
informed. A set of dimensions that any measure of
information quality should at least include is
proposed. This exploratory study reports the results of
a research survey among managers of companies committed
to quality management within the framework of a Total
Quality Management (TQM) model, which is an
information-intensive management model.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Bartoli:2015:DQC,
author = "Alberto Bartoli and Andrea {De Lorenzo} and Eric
Medvet and Fabiano Tarlao",
title = "Data Quality Challenge: Toward a Tool for String
Processing by Examples",
journal = j-JDIQ,
volume = "6",
number = "4",
pages = "13:1--13:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2786983",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ahlers:2015:DCQ,
author = "Dirk Ahlers and John Krogstie",
title = "Document and Corpus Quality Challenges for Knowledge
Management in Engineering Enterprises",
journal = j-JDIQ,
volume = "6",
number = "4",
pages = "14:1--14:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818379",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ramadan:2015:DSN,
author = "Banda Ramadan and Peter Christen and Huizhi Liang and
Ross W. Gayler",
title = "Dynamic Sorted Neighborhood Indexing for Real-Time
Entity Resolution",
journal = j-JDIQ,
volume = "6",
number = "4",
pages = "15:1--15:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2816821",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Real-time Entity Resolution (ER) is the process of
matching query records in subsecond time with records
in a database that represent the same real-world
entity. Indexing techniques are generally used to
efficiently extract a set of candidate records from the
database that are similar to a query record, and that
are to be compared with the query record in more
detail. The sorted neighborhood indexing method, which
sorts a database and compares records within a sliding
window, has been successfully used for ER of large
static databases. However, because it is based on
static sorted arrays and is designed for batch ER that
resolves all records in a database rather than
resolving those relating to a single query record, this
technique is not suitable for real-time ER on dynamic
databases that are constantly updated. We propose a
tree-based technique that facilitates dynamic indexing
based on the sorted neighborhood method, which can be
used for real-time ER, and investigate both static and
adaptive window approaches. We propose an approach to
reduce query matching times by precalculating the
similarities between attribute values stored in
neighboring tree nodes. We also propose a multitree
solution where different sorting keys are used to
reduce the effects of errors and variations in
attribute values on matching quality by building
several distinct index trees. We experimentally
evaluate our proposed techniques on large real
datasets, as well as on synthetic data with different
data quality characteristics. Our results show that as
the index grows, no appreciable increase occurs in both
record insertion and query times, and that using
multiple trees gives noticeable improvements on
matching quality with only a small increase in query
time. Compared to earlier indexing techniques for
real-time ER, our approach achieves significantly
reduced indexing and query matching times while
maintaining high matching accuracy.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Coletti:2015:DCH,
author = "Paolo Coletti and Maurizio Murgia",
title = "Design and Construction of a Historical Financial
Database of the {Italian} Stock Market 1973--2011",
journal = j-JDIQ,
volume = "6",
number = "4",
pages = "16:1--16:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2822898",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "This article presents the technical aspects of
designing and building a historical database of the
Italian Stock Market. The database contains daily
market data from 1973 to 2011 and is constructed by
merging two main digital sources and several other
hand-collected data sources. We analyzed and developed
semiautomatic tools to deal with problems related to
time-series matchings, quality of data, and numerical
errors. We also developed a concatenation structure to
allow the handling of company name changes, mergers,
and spin-offs without artificially altering numerical
series. At the same time, we maintained the
transparency of the historical information on each
individual company listed. Thanks to the overlapping of
digital and hand-collected data, the completed database
has a very high level of detail and accuracy. The
dataset is particularly suited for any empirical
research in financial economics and for more
practically oriented numerical applications and
forecasting simulations.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Missier:2015:CSI,
author = "Paolo Missier",
title = "Corrigendum to the Special Issue Editorial in {JDIQ}
Volume 5, Issue 3",
journal = j-JDIQ,
volume = "6",
number = "4",
pages = "17:1--17:??",
month = oct,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2821019",
ISSN = "1936-1955",
bibdate = "Tue Oct 27 22:10:29 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Chapman:2016:CQD,
author = "Adriane P. Chapman and Arnon Rosenthal and Len
Seligman",
title = "The Challenge of ``Quick and Dirty'' Information
Quality",
journal = j-JDIQ,
volume = "7",
number = "1--2",
pages = "1:1--1:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2834123",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Millar:2016:DQC,
author = "Jeremy R. Millar and Douglas D. Hodson and Gilbert L.
Peterson and Darryl K. Ahner",
title = "Data Quality Challenges in Distributed
Live-Virtual-Constructive Test Environments",
journal = j-JDIQ,
volume = "7",
number = "1--2",
pages = "2:1--2:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2850420",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Lukyanenko:2016:IQR,
author = "Roman Lukyanenko",
title = "Information Quality Research Challenge: Information
Quality in the Age of Ubiquitous Digital
Intermediation",
journal = j-JDIQ,
volume = "7",
number = "1--2",
pages = "3:1--3:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2856038",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "As information technology becomes an integral part of
daily life, increasingly, people understand the world
around them by turning to digital sources as opposed to
directly interacting with objects in the physical
world. This has ushered in the age of Ubiquitous
Digital Intermediation (UDI). With the explosion of
UDI, the scope of Information Quality (IQ) research is
due to expand dramatically as the challenge becomes to
capture the wealth and nuances of human experience.
This article presents three key changes to the IQ
landscape brought about by UDI, including expansion of
the scope of traditional IQ dimensions, digital to
physical mapping challenge, and the increased need to
manage content authenticity. UDI generates many novel
questions and opportunities for the IQ research
community.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Zhu:2016:DSC,
author = "Hongwei Zhu and Yang W. Lee and Arnon S. Rosenthal",
title = "Data Standards Challenges for Interoperable and
Quality Data",
journal = j-JDIQ,
volume = "7",
number = "1--2",
pages = "4:1--4:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2903723",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ulbricht:2016:CCD,
author = "Robert Ulbricht and Hilko Donker and Claudio Hartmann
and Martin Hahmann and Wolfgang Lehner",
title = "Challenges for Context-Driven Time Series
Forecasting",
journal = j-JDIQ,
volume = "7",
number = "1--2",
pages = "5:1--5:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2896822",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Predicting time series is a crucial task for
organizations, since decisions are often based on
uncertain information. Many forecasting models are
designed from a generic statistical point of view.
However, each real-world application requires
domain-specific adaptations to obtain high-quality
results. All such specifics are summarized by the term
of context. In contrast to current approaches, we want
to integrate context as the primary driver in the
forecasting process. We introduce context-driven time
series forecasting focusing on two exemplary domains:
renewable energy and sparse sales data. In view of
this, we discuss the challenge of context integration
in the individual process steps.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ceolin:2016:CUR,
author = "Davide Ceolin and Paul Groth and Valentina Maccatrozzo
and Wan Fokkink and Willem Robert {Van Hage} and
Archana Nottamkandath",
title = "Combining User Reputation and Provenance Analysis for
Trust Assessment",
journal = j-JDIQ,
volume = "7",
number = "1--2",
pages = "6:1--6:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818382",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Trust is a broad concept that in many systems is often
reduced to user reputation alone. However, user
reputation is just one way to determine trust. The
estimation of trust can be tackled from other
perspectives as well, including by looking at
provenance. Here, we present a complete pipeline for
estimating the trustworthiness of artifacts given their
provenance and a set of sample evaluations. The
pipeline is composed of a series of algorithms for (1)
extracting relevant provenance features, (2) generating
stereotypes of user behavior from provenance features,
(3) estimating the reputation of both stereotypes and
users, (4) using a combination of user and stereotype
reputations to estimate the trustworthiness of
artifacts, and (5) selecting sets of artifacts to
trust. These algorithms rely on the W3C PROV
recommendations for provenance and on evidential
reasoning by means of subjective logic. We evaluate the
pipeline over two tagging datasets: tags and
evaluations from the Netherlands Institute for Sound
and Vision's Waisda? video tagging platform, as well as
crowdsourced annotations from the Steve.Museum project.
The approach achieves up to 85\% precision when
predicting tag trustworthiness. Perhaps more
importantly, the pipeline provides satisfactory results
using relatively little evidence through the use of
provenance.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Christen:2016:ADA,
author = "Peter Christen and Ross W. Gayler and Khoi-Nguyen Tran
and Jeffrey Fisher and Dinusha Vatsalan",
title = "Automatic Discovery of Abnormal Values in Large
Textual Databases",
journal = j-JDIQ,
volume = "7",
number = "1--2",
pages = "7:1--7:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2889311",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Textual databases are ubiquitous in many application
domains. Examples of textual data range from names and
addresses of customers to social media posts and
bibliographic records. With online services,
individuals are increasingly required to enter their
personal details for example when purchasing products
online or registering for government services, while
many social network and e-commerce sites allow users to
post short comments. Many online sites leave open the
possibility for people to enter unintended or malicious
abnormal values, such as names with errors, bogus
values, profane comments, or random character
sequences. In other applications, such as online
bibliographic databases or comparative online shopping
sites, databases are increasingly populated in (semi-)
automatic ways through Web crawls. This practice can
result in low quality data being added automatically
into a database. In this article, we develop three
techniques to automatically discover abnormal
(unexpected or unusual) values in large textual
databases. Following recent work in categorical outlier
detection, our assumption is that ``normal'' values are
those that occur frequently in a database, while an
individual abnormal value is rare. Our techniques are
unsupervised and address the challenge of discovering
abnormal values as an outlier detection problem. Our
first technique is a basic but efficient q-gram set
based technique, the second is based on a probabilistic
language model, and the third employs morphological
word features to train a one-class support vector
machine classifier. Our aim is to investigate and
develop techniques that are fast, efficient, and
automatic. The output of our techniques can help in the
development of rule-based data cleaning and information
extraction systems, or be used as training data for
further supervised data cleaning procedures. We
evaluate our techniques on four large real-world
datasets from different domains: two US voter
registration databases containing personal details, the
2013 KDD Cup dataset of bibliographic records, and the
SNAP Memetracker dataset of phrases from social
networking sites. Our results show that our techniques
can efficiently and automatically discover abnormal
textual values, allowing an organization to conduct
efficient data exploration, and improve the quality of
their textual databases without the need of requiring
explicit training data.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Aiken:2016:ESD,
author = "Peter Aiken",
title = "{EXPERIENCE}: Succeeding at Data Management-{BigCo}
Attempts to Leverage Data",
journal = j-JDIQ,
volume = "7",
number = "1--2",
pages = "8:1--8:??",
month = jun,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2893482",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In a manner similar to most organizations, BigCompany
(BigCo) was determined to benefit strategically from
its widely recognized and vast quantities of data.
(U.S. government agencies make regular visits to BigCo
to learn from its experiences in this area.) When faced
with an explosion in data volume, increases in
complexity, and a need to respond to changing
conditions, BigCo struggled to respond using a
traditional, information technology (IT) project-based
approach to address these challenges. As BigCo was not
data knowledgeable, it did not realize that traditional
approaches could not work. Two full years into the
initiative, BigCo was far from achieving its initial
goals. How much more time, money, and effort would be
required before results were achieved? Moreover, could
the results be achieved in time to support a larger,
critical, technology-driven challenge that also
depended on solving the data challenges? While these
questions remain unaddressed, these considerations
increase our collective understanding of data assets as
separate from IT projects. Only by reconceiving data as
a strategic asset can organizations begin to address
these new challenges. Transformation to a data-driven
culture requires far more than technology, which
remains just one of three required ``stool legs''
(people and process being the other two). Seven
prerequisites to effectively leveraging data are
necessary, but insufficient awareness exists in most
organizations-hence, the widespread misfires in these
areas, especially when attempting to implement the
so-called big data initiatives. Refocusing on
foundational data management practices is required for
all organizations, regardless of their organizational
or data strategies.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Chiang:2016:UDC,
author = "Fei Chiang and Siddharth Sitaramachandran",
title = "Unifying Data and Constraint Repairs",
journal = j-JDIQ,
volume = "7",
number = "3",
pages = "9:1--9:??",
month = sep,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2883616",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Integrity constraints play an important role in data
design. However, in an operational database, they may
not be enforced for many reasons. Hence, over time,
data may become inconsistent with respect to the
constraints. To manage this, several approaches have
proposed techniques to repair the data by finding
minimal or lowest cost changes to the data that make it
consistent with the constraints. Such techniques are
appropriate for applications where only the data
changes, but schemas and their constraints remain
fixed. In many modern applications, however,
constraints may evolve over time as application or
business rules change, as data are integrated with new
data sources or as the underlying semantics of the data
evolves. In such settings, when an inconsistency
occurs, it is no longer clear if there is an error in
the data (and the data should be repaired) or if the
constraints have evolved (and the constraints should be
repaired). In this work, we present a novel unified
cost model that allows data and constraint repairs to
be compared on an equal footing. We consider repairs
over a database that is inconsistent with respect to a
set of rules, modeled as functional dependencies (FDs).
FDs are the most common type of constraint and are
known to play an important role in maintaining data
quality. We propose modifications to the data and to
the FDs such that the data and the constraints are
better aligned. We evaluate the quality and scalability
of our repair algorithms over synthetic and real
datasets. The results show that our repair algorithms
not only scale well for large datasets but also are
able to accurately capture and correct inconsistencies
and accurately decide when a data repair versus a
constraint repair is best.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Maltese:2016:SAC,
author = "Vincenzo Maltese and Fausto Giunchiglia",
title = "Search and Analytics Challenges in Digital Libraries
and Archives",
journal = j-JDIQ,
volume = "7",
number = "3",
pages = "10:1--10:??",
month = sep,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2939377",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Gelernter:2016:COE,
author = "J. Gelernter and J. Jha",
title = "Challenges in Ontology Evaluation",
journal = j-JDIQ,
volume = "7",
number = "3",
pages = "11:1--11:??",
month = sep,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2935751",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Berti-Equille:2016:VBD,
author = "Laure Berti-Equille and Mouhamadou Lamine Ba",
title = "Veracity of Big Data: Challenges of Cross-Modal Truth
Discovery",
journal = j-JDIQ,
volume = "7",
number = "3",
pages = "12:1--12:??",
month = sep,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2935753",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Haralabopoulos:2016:CIC,
author = "Giannis Haralabopoulos and Ioannis Anagnostopoulos and
Sherali Zeadally",
title = "The Challenge of Improving Credibility of
User-Generated Content in Online Social Networks",
journal = j-JDIQ,
volume = "7",
number = "3",
pages = "13:1--13:??",
month = sep,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2899003",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In every environment of information exchange,
Information Quality (IQ) is considered one of the most
important issues. Studies in Online Social Networks
(OSNs) analyze a number of related subjects that span
both theoretical and practical aspects, from data
quality identification and simple attribute
classification to quality assessment models for various
social environments. Among several factors that affect
information quality in online social networks is the
credibility of user-generated content. To address this
challenge, some proposed solutions include
community-based evaluation and labeling of
user-generated content in terms of accuracy, clarity,
and timeliness, along with well-established real-time
data mining techniques.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{DUrso:2016:EGD,
author = "Ciro D'Urso",
title = "{EXPERIENCE}: Glitches in Databases, How to Ensure
Data Quality by Outlier Detection Techniques",
journal = j-JDIQ,
volume = "7",
number = "3",
pages = "14:1--14:??",
month = sep,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950109",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:26 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Enterprise's archives are inevitably affected by the
presence of data quality problems (also called
glitches). This article proposes the application of a
new method to analyze the quality of datasets stored in
the tables of a database, with no knowledge of the
semantics of the data and without the need to define
repositories of rules. The proposed method is based on
proper revisions of different approaches for outlier
detection that are combined to boost overall
performance and accuracy. A novel transformation
algorithm is conceived that treats the items in
database tables as data points in real coordinate space
of n dimensions, so that fields containing dates and
fields containing text are processed to calculate
distances between those data points. The implementation
of an iterative approach ensures that global and local
outliers are discovered even if they are subject,
primarily in datasets with multiple outliers or
clusters of outliers, to masking and swamping effects.
The application of the method to a set of archives,
some of which have been studied extensively in the
literature, provides very promising experimental
results and outperforms the application of a single
other technique. Finally, a list of future research
directions is highlighted.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Labouseur:2017:IDD,
author = "Alan G. Labouseur and Carolyn C. Matheus",
title = "An Introduction to Dynamic Data Quality Challenges",
journal = j-JDIQ,
volume = "8",
number = "2",
pages = "6:1--6:??",
month = feb,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2998575",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:27 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Becker:2017:CTD,
author = "Christoph Becker and Kresimir Duretec and Andreas
Rauber",
title = "The Challenge of Test Data Quality in Data
Processing",
journal = j-JDIQ,
volume = "8",
number = "2",
pages = "7:1--7:??",
month = feb,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3012004",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:27 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ferro:2017:RCI,
author = "Nicola Ferro",
title = "Reproducibility Challenges in Information Retrieval
Evaluation",
journal = j-JDIQ,
volume = "8",
number = "2",
pages = "8:1--8:??",
month = feb,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3020206",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:27 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Shankaranarayanan:2017:CCE,
author = "G. Shankaranarayanan and Roger Blake",
title = "From Content to Context: The Evolution and Growth of
Data Quality Research",
journal = j-JDIQ,
volume = "8",
number = "2",
pages = "9:1--9:??",
month = feb,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2996198",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:27 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Research in data and information quality has made
significant strides over the last 20 years. It has
become a unified body of knowledge incorporating
techniques, methods, and applications from a variety of
disciplines including information systems, computer
science, operations management, organizational
behavior, psychology, and statistics. With
organizations viewing ``Big Data'', social media data,
data-driven decision-making, and analytics as critical,
data quality has never been more important. We believe
that data quality research is reaching the threshold of
significant growth and a metamorphosis from focusing on
measuring and assessing data quality-content-toward a
focus on usage and context. At this stage, it is vital
to understand the identity of this research area in
order to recognize its current state and to effectively
identify an increasing number of research opportunities
within. Using Latent Semantic Analysis (LSA) to analyze
the abstracts of 972 peer-reviewed journal and
conference articles published over the past 20 years,
this article contributes by identifying the core topics
and themes that define the identity of data quality
research. It further explores their trends over time,
pointing to the data quality dimensions that have-and
have not-been well-studied, and offering insights into
topics that may provide significant opportunities in
this area.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Goldberg:2017:PIS,
author = "Sean Goldberg and Daisy Zhe Wang and Christan Grant",
title = "A Probabilistically Integrated System for
Crowd-Assisted Text Labeling and Extraction",
journal = j-JDIQ,
volume = "8",
number = "2",
pages = "10:1--10:??",
month = feb,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3012003",
ISSN = "1936-1955",
bibdate = "Sat Apr 8 09:38:27 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "The amount of text data has been growing exponentially
in recent years, giving rise to automatic information
extraction methods that store text annotations in a
database. The current state-of-the-art structured
prediction methods, however, are likely to contain
errors and it is important to be able to manage the
overall uncertainty of the database. On the other hand,
the advent of crowdsourcing has enabled humans to aid
machine algorithms at scale. In this article, we
introduce pi-CASTLE, a system that optimizes and
integrates human and machine computing as applied to a
complex structured prediction problem involving
Conditional Random Fields (CRFs). We propose strategies
grounded in information theory to select a token
subset, formulate questions for the crowd to label, and
integrate these labelings back into the database using
a method of constrained inference. On both a text
segmentation task over academic citations and a named
entity recognition task over tweets we show an order of
magnitude improvement in accuracy gain over baseline
methods.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Woodall:2017:DRC,
author = "Philip Woodall",
title = "The Data Repurposing Challenge: New Pressures from
Data Analytics",
journal = j-JDIQ,
volume = "8",
number = "3--4",
pages = "11:1--11:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3022698",
ISSN = "1936-1955",
bibdate = "Mon Oct 2 09:44:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Markovic:2017:CQS,
author = "Milan Markovic and Peter Edwards",
title = "The Challenge of Quality in Social Computation",
journal = j-JDIQ,
volume = "8",
number = "3--4",
pages = "12:1--12:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3041762",
ISSN = "1936-1955",
bibdate = "Mon Oct 2 09:44:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Al-Hussaini:2017:EIB,
author = "Leena Al-Hussaini",
title = "Experience: Insights into the Benchmarking Data of
{Hunspell} and {Aspell} Spell Checkers",
journal = j-JDIQ,
volume = "8",
number = "3--4",
pages = "13:1--13:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3092700",
ISSN = "1936-1955",
bibdate = "Mon Oct 2 09:44:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib;
https://www.math.utah.edu/pub/tex/bib/spell.bib",
abstract = "Hunspell is a morphological spell checker and
automatic corrector for Macintosh 10.6 and later
versions. Aspell is a general spell checker and
automatic corrector for the GNU operating system. In
this experience article, we present a benchmarking
study of the performance of Hunspell and Aspell. Ginger
is a general grammatical spell checker that is used as
a baseline to compare the performance of Hunspell and
Aspell. A benchmark dataset was carefully selected to
be a mixture of different error types at different word
length levels. Further, the benchmarking data are from
very bad spellers and will challenge any spell checker.
The extensive study described in this work will
characterize the respective softwares and benchmarking
data from multiple perspectives and will consider many
error statistics. Overall, Hunspell can correct 415/469
words and Aspell can correct 414/469 words. The
baseline Ginger can correct 279/469 words. We recommend
this dataset as the preferred benchmark dataset for
evaluating newly developed ``isolated word'' spell
checkers.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Abdellaoui:2017:QSD,
author = "Sabrina Abdellaoui and Fahima Nader and Rachid
Chalal",
title = "{QDflows}: a System Driven by Knowledge Bases for
Designing Quality-Aware Data flows",
journal = j-JDIQ,
volume = "8",
number = "3--4",
pages = "14:1--14:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3064173",
ISSN = "1936-1955",
bibdate = "Mon Oct 2 09:44:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In the big data era, data integration is becoming
increasingly important. It is usually handled by data
flows processes that extract, transform, and clean data
from several sources, and populate the data integration
system (DIS). Designing data flows is facing several
challenges. In this article, we deal with data quality
issues such as (1) specifying a set of quality rules,
(2) enforcing them on the data flow pipeline to detect
violations, and (3) producing accurate repairs for the
detected violations. We propose QDflows, a system for
designing quality-aware data flows that considers the
following as input: (1) a high-quality knowledge base
(KB) as the global schema of integration, (2) a set of
data sources and a set of validated users'
requirements, (3) a set of defined mappings between
data sources and the KB, and (4) a set of quality rules
specified by users. QDflows uses an ontology to design
the DIS schema. It offers the ability to define the DIS
ontology as a module of the knowledge base, based on
validated users' requirements. The DIS ontology model
is then extended with multiple types of quality rules
specified by users. QDflows extracts and transforms
data from sources to populate the DIS. It detects
violations of quality rules enforced on the data flows,
constructs repair patterns, searches for horizontal and
vertical matches in the knowledge base, and performs an
automatic repair when possible or generates possible
repairs. It interactively involves users to validate
the repair process before loading the clean data into
the DIS. Using real-life and synthetic datasets, the
DBpedia and Yago knowledge bases, we experimentally
evaluate the generality, effectiveness, and efficiency
of QDflows. We also showcase an interactive tool
implementing our system.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{St-Maurice:2017:ECS,
author = "Justin St-Maurice and Catherine Burns",
title = "An Exploratory Case Study to Understand Primary Care
Users and Their Data Quality Tradeoffs",
journal = j-JDIQ,
volume = "8",
number = "3--4",
pages = "15:1--15:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3058750",
ISSN = "1936-1955",
bibdate = "Mon Oct 2 09:44:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Primary care data is an important part of the evolving
healthcare ecosystem. Generally, users in primary care
are expected to provide excellent patient care and
record high-quality data. In practice, users must
balance sets of priorities regarding care and data. The
goal of this study was to understand data quality
tradeoffs between timeliness, validity, completeness,
and use among primary care users. As a case study, data
quality measures and metrics are developed through a
focus group session with managers. After calculating
and extracting measurements of data quality from six
years of historic data, each measure was modeled with
logit binomial regression to show correlations,
characterize tradeoffs, and investigate data quality
interactions. Measures and correlations for
completeness, use, and timeliness were calculated for
196,967 patient encounters. Based on the analysis,
there was a positive relationship between validity and
completeness, and a negative relationship between
timeliness and use. Use of data and reductions in entry
delay were positively associated with completeness and
validity. Our results suggest that if users are not
provided with sufficient time to record data as part of
their regular workflow, they will prioritize spending
available time with patients. As a measurement of a
primary care system's effectiveness, the negative
correlation between use and timeliness points to a
self-reinforcing relationship that provides users with
little external value. In the future, additional data
can be generated from comparable organizations to test
several new hypotheses about primary care users.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Wang:2017:DDR,
author = "Jiannan Wang and Nan Tang",
title = "Dependable Data Repairing with Fixing Rules",
journal = j-JDIQ,
volume = "8",
number = "3--4",
pages = "16:1--16:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3041761",
ISSN = "1936-1955",
bibdate = "Mon Oct 2 09:44:30 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/jdqi/;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "One of the main challenges that data-cleaning systems
face is to automatically identify and repair data
errors in a dependable manner. Though data dependencies
(also known as integrity constraints) have been widely
studied to capture errors in data, automated and
dependable data repairing on these errors has remained
a notoriously difficult problem. In this work, we
introduce an automated approach for dependably
repairing data errors, based on a novel class of fixing
rules. A fixing rule contains an evidence pattern, a
set of negative patterns, and a fact value. The heart
of fixing rules is deterministic: given a tuple, the
evidence pattern and the negative patterns of a fixing
rule are combined to precisely capture which attribute
is wrong, and the fact indicates how to correct this
error. We study several fundamental problems associated
with fixing rules and establish their complexity. We
develop efficient algorithms to check whether a set of
fixing rules are consistent and discuss approaches to
resolve inconsistent fixing rules. We also devise
efficient algorithms for repairing data errors using
fixing rules. Moreover, we discuss approaches on how to
generate a large number of fixing rules from examples
or available knowledge bases. We experimentally
demonstrate that our techniques outperform other
automated algorithms in terms of the accuracy of
repairing data errors, using both real-life and
synthetic data.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Marcheggiani:2017:ELQ,
author = "Diego Marcheggiani and Fabrizio Sebastiani",
title = "On the Effects of Low-Quality Training Data on
Information Extraction from Clinical Reports",
journal = j-JDIQ,
volume = "9",
number = "1",
pages = "1:1--1:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3106235",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:56 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "In the last five years there has been a flurry of work
on information extraction from clinical documents, that
is, on algorithms capable of extracting, from the
informal and unstructured texts that are generated
during everyday clinical practice, mentions of concepts
relevant to such practice. Many of these research works
are about methods based on supervised learning, that
is, methods for training an information extraction
system from manually annotated examples. While a lot of
work has been devoted to devising learning methods that
generate more and more accurate information extractors,
no work has been devoted to investigating the effect of
the quality of training data on the learning process
for the clinical domain. Low quality in training data
often derives from the fact that the person who has
annotated the data is different from the one against
whose judgment the automatically annotated data must be
evaluated. In this article, we test the impact of such
data quality issues on the accuracy of information
extraction systems as applied to the clinical domain.
We do this by comparing the accuracy deriving from
training data annotated by the authoritative coder
(i.e., the one who has also annotated the test data and
by whose judgment we must abide) with the accuracy
deriving from training data annotated by a different
coder, equally expert in the subject matter. The
results indicate that, although the disagreement
between the two coders (as measured on the training
set) is substantial, the difference is (surprisingly
enough) not always statistically significant. While the
dataset used in the present work originated in a
clinical context, the issues we study in this work are
of more general interest.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Basheer:2017:CBQ,
author = "Aseel Basheer and Kewei Sha",
title = "Cluster-Based Quality-Aware Adaptive Data Compression
for Streaming Data",
journal = j-JDIQ,
volume = "9",
number = "1",
pages = "2:1--2:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3122863",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:56 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Wireless sensor networks (WSNs) are widely applied in
data collection applications. Energy efficiency is one
of the most important design goals of WSNs. In this
article, we examine the tradeoffs between the energy
efficiency and the data quality. First, four attributes
used to evaluate data quality are formally defined.
Then, we propose a novel data compression algorithm,
Quality-Aware Adaptive data Compression (QAAC), to
reduce the amount of data communication to save energy.
QAAC utilizes an adaptive clustering algorithm to build
clusters from dataset; then a code for each cluster is
generated and stored in a Huffman encoding tree. The
encoding algorithm encodes the original dataset based
on the Haffman encoding tree. An improvement algorithm
is also designed to reduce the information loss when
data are compressed. After the encoded data, the
Huffman encoding tree and parameters used in the
improvement algorithm have been received at the sink, a
decompression algorithm is used to retrieve the
approximation of the original dataset. The performance
evaluation shows that QAAC is efficient and achieves a
much higher compression ratio than lossy and lossless
compression algorithms, while it has much smaller
information loss than lossy compression algorithms.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Corsar:2017:COD,
author = "David Corsar and Peter Edwards",
title = "Challenges of Open Data Quality: More Than Just
License, Format, and Customer Support",
journal = j-JDIQ,
volume = "9",
number = "1",
pages = "3:1--3:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3110291",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:56 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{El-Mawass:2017:DQC,
author = "Nour El-Mawass and Saad Alaboodi",
title = "Data Quality Challenges in Social Spam Research",
journal = j-JDIQ,
volume = "9",
number = "1",
pages = "4:1--4:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3090057",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:56 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Chen:2017:IQC,
author = "Min Chen and Roman Lukyanenko and Monica Chiarini
Tremblay",
title = "Information Quality Challenges in Shared Healthcare
Decision Making",
journal = j-JDIQ,
volume = "9",
number = "1",
pages = "5:1--5:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3090056",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:56 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Arbuckle:2017:CPC,
author = "Peter Arbuckle and Ezra Kahn and Adam Kriesberg",
title = "Challenge Paper: Challenges to Sharing Data and Models
for Life Cycle Assessment",
journal = j-JDIQ,
volume = "9",
number = "1",
pages = "6:1--6:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3106236",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:56 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Raschid:2018:ECJ,
author = "Louiqa Raschid",
title = "{Editor-in-Chief (January 2014--May 2017)} Farewell
Report",
journal = j-JDIQ,
volume = "9",
number = "2",
pages = "7:1--7:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3143313",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:57 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Catarci:2018:FNJ,
author = "Tiziana Catarci",
title = "Foreword from the New {JDIQ Editor-in-Chief}",
journal = j-JDIQ,
volume = "9",
number = "2",
pages = "8:1--8:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3143316",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:57 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Truong:2018:CEQ,
author = "Hong-Linh Truong and Aitor Murguzur and Erica Yang",
title = "Challenges in Enabling Quality of Analytics in the
Cloud",
journal = j-JDIQ,
volume = "9",
number = "2",
pages = "9:1--9:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3138806",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:57 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Koh:2018:ELA,
author = "Kyu Han Koh and Eric Fouh and Mohammed F. Farghally
and Hossameldin Shahin and Clifford A. Shaffer",
title = "Experience: Learner Analytics Data Quality for an
{eTextbook} System",
journal = j-JDIQ,
volume = "9",
number = "2",
pages = "10:1--10:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3148240",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:57 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "We present lessons learned related to data collection
and analysis from 5 years of experience with the
eTextbook system OpenDSA. The use of such cyberlearning
systems is expanding rapidly in both formal and
informal educational settings. Although the precise
issues related to any such project are idiosyncratic
based on the data collection technology and goals of
the project, certain types of data collection problems
will be common. We begin by describing the nature of
the data transmitted between the student's client
machine and the database server, and our initial
database schema for storing interaction log data. We
describe many problems that we encountered, with the
nature of the problems categorized as syntactic-level
data collection issues, issues with relating events to
users, or issues with tracking users over time.
Relating events to users and tracking the time spent on
tasks are both prerequisites to converting
syntactic-level interaction streams to semantic-level
behavior needed for higher-order analysis of the data.
Finally, we describe changes made to our database
schema that helped to resolve many of the issues that
we had encountered. These changes help advance our
ultimate goal of encouraging a change from ineffective
learning behavior by students to more productive
behavior.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Cappiello:2018:VDQ,
author = "C. Cappiello and C. Cerletti and C. Fratto and B.
Pernici",
title = "Validating Data Quality Actions in Scoring Processes",
journal = j-JDIQ,
volume = "9",
number = "2",
pages = "11:1--11:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3141248",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:57 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Data quality has gained momentum among organizations
upon the realization that poor data quality might cause
failures and/or inefficiencies, thus compromising
business processes and application results. However,
enterprises often adopt data quality assessment and
improvement methods based on practical and empirical
approaches without conducting a rigorous analysis of
the data quality issues and outcome of the enacted data
quality improvement practices. In particular, data
quality management, especially the identification of
the data quality dimensions to be monitored and
improved, is performed by knowledge workers on the
basis of their skills and experience. Control methods
are therefore designed on the basis of expected and
evident quality problems; thus, these methods may not
be effective in dealing with unknown and/or unexpected
problems. This article aims to provide a methodology,
based on fault injection, for validating the data
quality actions used by organizations. We show how it
is possible to check whether the adopted techniques
properly monitor the real issues that may damage
business processes. At this stage, we focus on scoring
processes, i.e., those in which the output represents
the evaluation or ranking of a specific object. We show
the effectiveness of our proposal by means of a case
study in the financial risk management area.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Heinrich:2018:RDQ,
author = "Bernd Heinrich and Diana Hristova and Mathias Klier
and Alexander Schiller and Michael Szubartowicz",
title = "Requirements for Data Quality Metrics",
journal = j-JDIQ,
volume = "9",
number = "2",
pages = "12:1--12:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3148238",
ISSN = "1936-1955",
bibdate = "Mon Jan 22 16:07:57 MST 2018",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Data quality and especially the assessment of data
quality have been intensively discussed in research and
practice alike. To support an economically oriented
management of data quality and decision making under
uncertainty, it is essential to assess the data quality
level by means of well-founded metrics. However, if not
adequately defined, these metrics can lead to wrong
decisions and economic losses. Therefore, based on a
decision-oriented framework, we present a set of five
requirements for data quality metrics. These
requirements are relevant for a metric that aims to
support an economically oriented management of data
quality and decision making under uncertainty. We
further demonstrate the applicability and efficacy of
these requirements by evaluating five data quality
metrics for different data quality dimensions.
Moreover, we discuss practical implications when
applying the presented requirements.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Geerts:2018:ESI,
author = "Floris Geerts and Paolo Missier and Norman Paton",
title = "Editorial: Special Issue on Improving the Veracity and
Value of Big Data",
journal = j-JDIQ,
volume = "9",
number = "3",
pages = "13:1--13:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3174791",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Bertossi:2018:OMD,
author = "Leopoldo Bertossi and Mostafa Milani",
title = "Ontological Multidimensional Data Models and
Contextual Data Quality",
journal = j-JDIQ,
volume = "9",
number = "3",
pages = "14:1--14:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3148239",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Data quality assessment and data cleaning are
context-dependent activities. Motivated by this
observation, we propose the Ontological
Multidimensional Data Model (OMD model), which can be
used to model and represent contexts as logic-based
ontologies. The data under assessment are mapped into
the context for additional analysis, processing, and
quality data extraction. The resulting contexts allow
for the representation of dimensions, and
multidimensional data quality assessment becomes
possible. At the core of a multidimensional context, we
include a generalized multidimensional data model and a
Datalog$^\pm $ ontology with provably good properties
in terms of query answering. These main components are
used to represent dimension hierarchies, dimensional
constraints, and dimensional rules and define
predicates for quality data specification. Query
answering relies on and triggers navigation through
dimension hierarchies and becomes the basic tool for
the extraction of quality data. The OMD model is
interesting per se beyond applications to data quality.
It allows for a logic-based and computationally
tractable representation of multidimensional data,
extending previous multidimensional data models with
additional expressive power and functionalities.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Mountantonakis:2018:SMM,
author = "Michalis Mountantonakis and Yannis Tzitzikas",
title = "Scalable Methods for Measuring the Connectivity and
Quality of Large Numbers of Linked Datasets",
journal = j-JDIQ,
volume = "9",
number = "3",
pages = "15:1--15:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3165713",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Although the ultimate objective of Linked Data is
linking and integration, it is not currently evident
how connected the current Linked Open Data (LOD) cloud
is. In this article, we focus on methods, supported by
special indexes and algorithms, for performing
measurements related to the connectivity of more than
two datasets that are useful in various tasks including
(a) Dataset Discovery and Selection; (b) Object
Coreference, i.e., for obtaining complete information
about a set of entities, including provenance
information; (c) Data Quality Assessment and
Improvement, i.e., for assessing the connectivity
between any set of datasets and monitoring their
evolution over time, as well as for estimating data
veracity; (d) Dataset Visualizations; and various other
tasks. Since it would be prohibitively expensive to
perform all these measurements in a na{\"\i}ve way, in
this article, we introduce indexes (and their
construction algorithms) that can speed up such tasks.
In brief, we introduce (i) a namespace-based prefix
index, (ii) a sameAs catalog for computing the
symmetric and transitive closure of the owl:sameAs
relationships encountered in the datasets, (iii) a
semantics-aware element index (that exploits the
aforementioned indexes), and, finally, (iv) two
lattice-based incremental algorithms for speeding up
the computation of the intersection of URIs of any set
of datasets. For enhancing scalability, we propose
parallel index construction algorithms and parallel
lattice-based incremental algorithms, we evaluate the
achieved speedup using either a single machine or a
cluster of machines, and we provide insights regarding
the factors that affect efficiency. Finally, we report
measurements about the connectivity of the (billion
triples-sized) LOD cloud that have never been carried
out so far.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Esteves:2018:TVA,
author = "Diego Esteves and Anisa Rula and Aniketh Janardhan
Reddy and Jens Lehmann",
title = "Toward Veracity Assessment in {RDF} Knowledge Bases:
an Exploratory Analysis",
journal = j-JDIQ,
volume = "9",
number = "3",
pages = "16:1--16:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177873",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Among different characteristics of knowledge bases,
data quality is one of the most relevant to maximize
the benefits of the provided information. Knowledge
base quality assessment poses a number of big data
challenges such as high volume, variety, velocity, and
veracity. In this article, we focus on answering
questions related to the assessment of the veracity of
facts through Deep Fact Validation (DeFacto), a triple
validation framework designed to assess facts in RDF
knowledge bases. Despite current developments in the
research area, the underlying framework faces many
challenges. This article pinpoints and discusses these
issues and conducts a thorough analysis of its
pipeline, aiming at reducing the error propagation
through its components. Furthermore, we discuss recent
developments related to this fact validation as well as
describing advantages and drawbacks of state-of-the-art
models. As a result of this exploratory analysis, we
give insights and directions toward a better
architecture to tackle the complex task of
fact-checking in knowledge bases.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Chen:2018:CAS,
author = "Qingyu Chen and Yu Wan and Xiuzhen Zhang and Yang Lei
and Justin Zobel and Karin Verspoor",
title = "Comparative Analysis of Sequence Clustering Methods
for Deduplication of Biological Databases",
journal = j-JDIQ,
volume = "9",
number = "3",
pages = "17:1--17:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3131611",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "The massive volumes of data in biological sequence
databases provide a remarkable resource for large-scale
biological studies. However, the underlying data
quality of these resources is a critical concern. A
particular challenge is duplication, in which multiple
records have similar sequences, creating a high level
of redundancy that impacts database storage, curation,
and search. Biological database deduplication has two
direct applications: for database curation, where
detected duplicates are removed to improve curation
efficiency, and for database search, where detected
duplicate sequences may be flagged but remain available
to support analysis. Clustering methods have been
widely applied to biological sequences for database
deduplication. Since an exhaustive all-by-all pairwise
comparison of sequences cannot scale for a high volume
of data, heuristic approaches have been recruited, such
as the use of simple similarity thresholds. In this
article, we present a comparison between CD-HIT and
UCLUST, the two best-known clustering tools for
sequence database deduplication. Our contributions
include a detailed assessment of the redundancy
remaining after deduplication, application of standard
clustering evaluation metrics to quantify the cohesion
and separation of the clusters generated by each
method, and a biological case study that assesses
intracluster function annotation consistency to
demonstrate the impact of these factors on a practical
application of the sequence clustering methods. Our
results show that the trade-off between efficiency and
accuracy becomes acute when low threshold values are
used and when cluster sizes are large. This evaluation
leads to practical recommendations for users for more
effective uses of the sequence clustering tools for
deduplication.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Gal:2018:CPD,
author = "Avigdor Gal and Arik Senderovich and Matthias
Weidlich",
title = "Challenge Paper: Data Quality Issues in Queue Mining",
journal = j-JDIQ,
volume = "9",
number = "4",
pages = "18:1--18:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3165712",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "18",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Musyaffa:2018:EOF,
author = "Fathoni A. Musyaffa and Christiane Engels and
Maria-Esther Vidal and Fabrizio Orlandi and S{\"o}ren
Auer",
title = "Experience: Open Fiscal Datasets, Common Issues, and
Recommendations",
journal = j-JDIQ,
volume = "9",
number = "4",
pages = "19:1--19:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3190576",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Public administrations are continuously publishing
open data, increasing the amount of government open
data over time. The published data includes budgets and
spending as part of fiscal data; publishing these data
is an important part of transparent and accountable
governance. However, open fiscal data should also meet
open data publication guidelines. When requirements in
data guidelines are not met, effective data analysis
over published datasets cannot be performed
effectively. In this article, we present Open Fiscal
Data Publication (OFDP), a framework to assess the
quality of open fiscal datasets. We also present an
extensive open fiscal data assessment and common data
quality issues found; additionally, open fiscal data
publishing guidelines are presented. We studied and
surveyed main quality factors for open fiscal datasets.
Moreover, the collected quality factors have been
scored according to the results of a questionnaire to
score quality factors within the OFDP assessment
framework. We gather and comprehensively analyze a
representative set of 77 fiscal datasets from several
public administrations across different regions at
different levels (e.g., supranational, national,
municipality). We characterize quality issues commonly
arising in these datasets. Our assessment shows that
there are many quality factors in fiscal data
publication that still need to be taken care of so that
the data can be analyzed effectively. Our proposed
guidelines allow for publishing open fiscal data where
these quality issues are avoided.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "19",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Alshayeb:2018:SSP,
author = "Mohammad Alshayeb and Yasser Shaaban and Jarallah
Al-Ghamdi",
title = "{SPMDL}: Software Product Metrics Definition
Language",
journal = j-JDIQ,
volume = "9",
number = "4",
pages = "20:1--20:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3185049",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Software metrics are becoming more acceptable measures
for software quality assessment. However, there is no
standard form to represent metric definitions, which
would be useful for metrics exchange and customization.
In this article, we propose the Software Product
Metrics Definition Language (SPMDL). We develop an
XML-based description language to define software
metrics in a precise and reusable form. Metric
definitions in SPMDL are based on meta-models extracted
from either source code or design artifacts, such as
the Dagstuhl Middle Meta-model, with support for
various abstraction levels. The language defines
several flexible computation mechanisms, such as
extended Object Constraint Language queries and
predefined graph operations on the meta-model. SPMDL
provides an unambiguous description of the metric
definition; it is also easy to use and is extensible.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "20",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ashish:2018:MRB,
author = "Naveen Ashish and Arihant Patawari",
title = "Machine Reading of Biomedical Data Dictionaries",
journal = j-JDIQ,
volume = "9",
number = "4",
pages = "21:1--21:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177874",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "This article describes an approach for the automated
reading of biomedical data dictionaries. Automated
reading is the process of extracting element details
for each of the data elements from a data dictionary in
a document format (such as PDF) to a completely
structured representation. A structured representation
is essential if the data dictionary metadata are to be
used in applications such as data integration and also
in evaluating the quality of the associated data. We
present an approach and implemented solution for the
problem, considering different formats of data
dictionaries. We have a particular focus on the most
challenging format with a machine-learning
classification solution to the problem using
conditional random field classifiers. We present an
evaluation using several actual data dictionaries,
demonstrating the effectiveness of our approach.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "21",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Chiang:2018:IPS,
author = "Fei Chiang and Dhruv Gairola",
title = "{InfoClean}: Protecting Sensitive Information in Data
Cleaning",
journal = j-JDIQ,
volume = "9",
number = "4",
pages = "22:1--22:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3190577",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:58 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Data quality has become a pervasive challenge for
organizations as they wrangle with large, heterogeneous
datasets to extract value. Given the proliferation of
sensitive and confidential information, it is crucial
to consider data privacy concerns during the data
cleaning process. For example, in medical database
applications, varying levels of privacy are enforced
across the attribute values. Attributes such as a
patient's country or city of residence may be less
sensitive than the patient's prescribed medication.
Traditional data cleaning techniques assume the data is
openly accessible, without considering the differing
levels of information sensitivity. In this work, we
take the first steps toward a data cleaning model that
integrates privacy as part of the data cleaning
process. We present a privacy-aware data cleaning
framework that differentiates the information content
among the attribute values during the data cleaning
process to resolve data inconsistencies while
minimizing the amount of information disclosed. Our
data repair algorithm includes a set of data disclosure
operations that considers the information content of
the underlying attribute values, while maximizing data
utility. Our evaluation using real datasets shows that
our algorithm scales well, and achieves improved
performance and comparable repair accuracy against
existing data cleaning solutions.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "22",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Bertino:2018:ACE,
author = "Elisa Bertino and Mohammad R. Jahanshahi",
title = "Adaptive and Cost-Effective Collection of High-Quality
Data for Critical Infrastructure and Emergency
Management in Smart Cities-Framework and Challenges",
journal = j-JDIQ,
volume = "10",
number = "1",
pages = "1:1--1:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3190579",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Flores:2018:IQA,
author = "Javier Flores and Jun Sun",
title = "Information Quality Awareness and Information Quality
Practice",
journal = j-JDIQ,
volume = "10",
number = "1",
pages = "2:1--2:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3182182",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Healthcare organizations increasingly rely on
electronic information to optimize their operations.
Information of high diversity from various sources
accentuate the relevance and importance of information
quality (IQ). The quality of information needs to be
improved to support a more efficient and reliable
utilization of healthcare information systems (IS).
This can only be achieved through the implementation of
initiatives followed by most users across an
organization. The purpose of this study is to examine
how awareness of IS users about IQ issues would affect
their IQ behavior. Based on multiple theoretical
frameworks, it is hypothesized that different aspects
of user motivation mediate the relationship between the
awareness on both beneficial and problematic situations
and IQ practice inclination. In addition, social
influence and facilitating condition moderate the
relationship between IQ practice inclination and overt
IQ practice. The theoretical and practical implications
of findings are discussed, especially how to enhance IQ
compliance in the healthcare settings.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Bors:2018:VIC,
author = "Christian Bors and Theresia Gschwandtner and Simone
Kriglstein and Silvia Miksch and Margit Pohl",
title = "Visual Interactive Creation, Customization, and
Analysis of Data Quality Metrics",
journal = j-JDIQ,
volume = "10",
number = "1",
pages = "3:1--3:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3190578",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "During data preprocessing, analysts spend a
significant part of their time and effort profiling the
quality of the data along with cleansing and
transforming the data for further analysis. While
quality metrics-ranging from general to domain-specific
measures-support assessment of the quality of a
dataset, there are hardly any approaches to visually
support the analyst in customizing and applying such
metrics. Yet, visual approaches could facilitate users'
involvement in data quality assessment. We present
MetricDoc, an interactive environment for assessing
data quality that provides customizable, reusable
quality metrics in combination with immediate visual
feedback. Moreover, we provide an overview
visualization of these quality metrics along with error
visualizations that facilitate interactive navigation
of the data to determine the causes of quality issues
present in the data. In this article, we describe the
architecture, design, and evaluation of MetricDoc,
which underwent several design cycles, including
heuristic evaluation and expert reviews as well as a
focus group with data quality, human-computer
interaction, and visual analytics experts.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Zhang:2018:ASB,
author = "Han Zhang and Shawndra Hill and David Rothschild",
title = "Addressing Selection Bias in Event Studies with
General-Purpose Social Media Panels",
journal = j-JDIQ,
volume = "10",
number = "1",
pages = "4:1--4:??",
month = may,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3185048",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Data from Twitter have been employed in prior research
to study the impacts of events. Conventionally,
researchers use keyword-based samples of tweets to
create a panel of Twitter users who mention
event-related keywords during and after an event.
However, the keyword-based sampling is limited in its
objectivity dimension of data and information quality.
First, the technique suffers from selection bias since
users who discuss an event are already more likely to
discuss event-related topics beforehand. Second, there
are no viable control groups for comparison to a
keyword-based sample of Twitter users. We propose an
alternative sampling approach to construct panels of
users defined by their geolocation. Geolocated panels
are exogenous to the keywords in users' tweets,
resulting in less selection bias than the keyword panel
method. Geolocated panels allow us to follow
within-person changes over time and enable the creation
of comparison groups. We compare different panels in
two real-world settings: response to mass shootings and
TV advertising. We first show the strength of the
selection biases of keyword panels. Then, we
empirically illustrate how geolocated panels reduce
selection biases and allow meaningful comparison groups
regarding the impact of the studied events. We are the
first to provide a clear, empirical example of how a
better panel selection design, based on an exogenous
variable such as geography, both reduces selection bias
compared to the current state of the art and increases
the value of Twitter research for studying events.
While we advocate for the use of a geolocated panel, we
also discuss its weaknesses and application scenario
seriously. This article also calls attention to the
importance of selection bias in impacting the
objectivity of social media data.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Puentes:2018:CQE,
author = "John Puentes and Pedro Merino Laso and David Brosset",
title = "The Challenge of Quality Evaluation in Fraud
Detection",
journal = j-JDIQ,
volume = "10",
number = "2",
pages = "5:1--5:??",
month = sep,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3228341",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3228341",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Bertino:2018:CAC,
author = "Elisa Bertino and Amani Abu Jabal and Seraphin Calo
and Dinesh Verma and Christopher Williams",
title = "The Challenge of Access Control Policies Quality",
journal = j-JDIQ,
volume = "10",
number = "2",
pages = "6:1--6:??",
month = sep,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3209668",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3209668",
abstract = "Access Control policies allow one to control data
sharing among multiple subjects. For high assurance
data security, it is critical that such policies be fit
for their purpose. In this paper we introduce the
notion of ``policy quality'' and elaborate on its many
dimensions, such as consistency, completeness, and
minimality. We introduce a framework supporting the
analysis of policies with respect to the introduced
quality dimensions and elaborate on research
challenges, including policy analysis for large-scale
distributed systems, assessment of policy correctness,
and analysis of policies expressed in richer policy
models.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Karanja:2018:CPT,
author = "Evanson Mwangi Karanja and Shedden Masupe and Mandu
Gasennelwe-Jeffrey",
title = "Challenge Paper: Towards Open Datasets for {Internet
of Things} Malware",
journal = j-JDIQ,
volume = "10",
number = "2",
pages = "7:1--7:??",
month = sep,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3230669",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Koumarelas:2018:EEA,
author = "Ioannis Koumarelas and Axel Kroschk and Clifford
Mosley and Felix Naumann",
title = "Experience: Enhancing Address Matching with Geocoding
and Similarity Measure Selection",
journal = j-JDIQ,
volume = "10",
number = "2",
pages = "8:1--8:??",
month = sep,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3232852",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Given a query record, record matching is the problem
of finding database records that represent the same
real-world object. In the easiest scenario, a database
record is completely identical to the query. However,
in most cases, problems do arise, for instance, as a
result of data errors or data integrated from multiple
sources or received from restrictive form fields. These
problems are usually difficult, because they require a
variety of actions, including field segmentation,
decoding of values, and similarity comparisons, each
requiring some domain knowledge. In this article, we
study the problem of matching records that contain
address information, including attributes such as
Street-address and City. To facilitate this matching
process, we propose a domain-specific procedure to,
first, enrich each record with a more complete
representation of the address information through
geocoding and reverse-geocoding and, second, to select
the best similarity measure per each address attribute
that will finally help the classifier to achieve the
best f-measure. We report on our experience in
selecting geocoding services and discovering similarity
measures for a concrete but common industry use-case.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ferro:2018:ISIa,
author = "Nicola Ferro and Norbert Fuhr and Andreas Rauber",
title = "Introduction to the Special Issue on Reproducibility
in Information Retrieval: Evaluation Campaigns,
Collections, and Analyses",
journal = j-JDIQ,
volume = "10",
number = "3",
pages = "9:1--9:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3268408",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Moffat:2018:EMU,
author = "Alistair Moffat and Falk Scholer and Ziying Yang",
title = "Estimating Measurement Uncertainty for Information
Retrieval Effectiveness Metrics",
journal = j-JDIQ,
volume = "10",
number = "3",
pages = "10:1--10:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239572",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3239572",
abstract = "One typical way of building test collections for
offline measurement of information retrieval systems is
to pool the ranked outputs of different systems down to
some chosen depth d and then form relevance judgments
for those documents only. Non-pooled documents-ones
that did not appear in the top- d sets of any of the
contributing systems-are then deemed to be non-relevant
for the purposes of evaluating the relative behavior of
the systems. In this article, we use RBP-derived
residuals to re-examine the reliability of that
process. By fitting the RBP parameter $ \phi $ to
maximize similarity between AP- and NDCG-induced system
rankings, on the one hand, and RBP-induced rankings, on
the other, an estimate can be made as to the potential
score uncertainty associated with those two
recall-based metrics. We then consider the effect that
residual size-as an indicator of possible measurement
uncertainty in utility-based metrics-has in connection
with recall-based metrics by computing the effect of
increasing pool sizes and examining the trends that
arise in terms of both metric score and system
separability using standard statistical tests. The
experimental results show that the confidence levels
expressed via the p -values generated by statistical
tests are only weakly connected to the size of the
residual and to the degree of measurement uncertainty
caused by the presence of unjudged documents.
Statistical confidence estimates are, however, largely
consistent as pooling depths are altered. We therefore
recommend that all such experimental results should
report, in addition to the outcomes of statistical
significance tests, the residual measurements generated
by a suitably matched weighted-precision metric, to
give a clear indication of measurement uncertainty that
arises due to the presence of unjudged documents in
test collections with finite pooled judgments.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Roitero:2018:RGE,
author = "Kevin Roitero and Marco Passon and Giuseppe Serra and
Stefano Mizzaro",
title = "{Reproduce}. {Generalize}. {Extend}. {On} Information
Retrieval Evaluation without Relevance Judgments",
journal = j-JDIQ,
volume = "10",
number = "3",
pages = "11:1--11:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3241064",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3241064",
abstract = "The evaluation of retrieval effectiveness by means of
test collections is a commonly used methodology in the
information retrieval field. Some researchers have
addressed the quite fascinating research question of
whether it is possible to evaluate effectiveness
completely automatically, without human relevance
assessments. Since human relevance assessment is one of
the main costs of building a test collection, both in
human time and money resources, this rather ambitious
goal would have a practical impact. In this article, we
reproduce the main results on evaluating information
retrieval systems without relevance judgments;
furthermore, we generalize such previous work to
analyze the effect of test collections, evaluation
metrics, and pool depth. We also expand the idea to
semi-automatic evaluation and estimation of topic
difficulty. Our results show that (i) previous work is
overall reproducible, although some specific results
are not; (ii) collection, metric, and pool depth impact
the automatic evaluation of systems, which is anyway
accurate in several cases; (iii) semi-automatic
evaluation is an effective methodology; and (iv)
automatic evaluation can (to some extent) be used to
predict topic difficulty.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Roitero:2018:RIE,
author = "Kevin Roitero and Michael Soprano and Andrea Brunello
and Stefano Mizzaro",
title = "Reproduce and Improve: an Evolutionary Approach to
Select a Few Good Topics for Information Retrieval
Evaluation",
journal = j-JDIQ,
volume = "10",
number = "3",
pages = "12:1--12:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239573",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3239573",
abstract = "Effectiveness evaluation of information retrieval
systems by means of a test collection is a widely used
methodology. However, it is rather expensive in terms
of resources, time, and money; therefore, many
researchers have proposed methods for a cheaper
evaluation. One particular approach, on which we focus
in this article, is to use fewer topics: in TREC-like
initiatives, usually system effectiveness is evaluated
as the average effectiveness on a set of n topics
(usually, n =50, but more than 1,000 have been also
adopted); instead of using the full set, it has been
proposed to find the best subsets of a few good topics
that evaluate the systems in the most similar way to
the full set. The computational complexity of the task
has so far limited the analysis that has been
performed. We develop a novel and efficient approach
based on a multi-objective evolutionary algorithm. The
higher efficiency of our new implementation allows us
to reproduce some notable results on topic set
reduction, as well as perform new experiments to
generalize and improve such results. We show that our
approach is able to both reproduce the main
state-of-the-art results and to allow us to analyze the
effect of the collection, metric, and pool depth used
for the evaluation. Finally, differently from previous
studies, which have been mainly theoretical, we are
also able to discuss some practical topic selection
strategies, integrating results of automatic evaluation
approaches.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Jagerman:2018:OLL,
author = "Rolf Jagerman and Krisztian Balog and Maarten {De
Rijke}",
title = "{OpenSearch}: Lessons Learned from an Online
Evaluation Campaign",
journal = j-JDIQ,
volume = "10",
number = "3",
pages = "13:1--13:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239575",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:16:59 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3239575",
abstract = "We report on our experience with TREC OpenSearch, an
online evaluation campaign that enabled researchers to
evaluate their experimental retrieval methods using
real users of a live website. Specifically, we focus on
the task of ad hoc document retrieval within the
academic search domain, and work with two search
engines, CiteSeerX and SSOAR, that provide us with
traffic. We describe our experimental platform, which
is based on the living labs methodology, and report on
the experimental results obtained. We also share our
experiences, challenges, and the lessons learned from
running this track in 2016 and 2017.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ferro:2018:ISIb,
author = "Nicola Ferro and Norbert Fuhr and Andreas Rauber",
title = "Introduction to the Special Issue on Reproducibility
in Information Retrieval: Tools and Infrastructures",
journal = j-JDIQ,
volume = "10",
number = "4",
pages = "14:1--14:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3268410",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Hopfgartner:2018:ESC,
author = "Frank Hopfgartner and Allan Hanbury and Henning
M{\"u}ller and Ivan Eggel and Krisztian Balog and
Torben Brodt and Gordon V. Cormack and Jimmy Lin and
Jayashree Kalpathy-Cramer and Noriko Kando and Makoto
P. Kato and Anastasia Krithara and Tim Gollub and
Martin Potthast and Evelyne Viegas and Simon Mercer",
title = "Evaluation-as-a-Service for the Computational
Sciences: Overview and Outlook",
journal = j-JDIQ,
volume = "10",
number = "4",
pages = "15:1--15:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239570",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Evaluation in empirical computer science is essential
to show progress and assess technologies developed.
Several research domains such as information retrieval
have long relied on systematic evaluation to measure
progress: here, the Cranfield paradigm of creating
shared test collections, defining search tasks, and
collecting ground truth for these tasks has persisted
up until now. In recent years, however, several new
challenges have emerged that do not fit this paradigm
very well: extremely large data sets, confidential data
sets as found in the medical domain, and rapidly
changing data sets as often encountered in industry.
Crowdsourcing has also changed the way in which
industry approaches problem-solving with companies now
organizing challenges and handing out monetary awards
to incentivize people to work on their challenges,
particularly in the field of machine learning. This
article is based on discussions at a workshop on
Evaluation-as-a-Service (EaaS). EaaS is the paradigm of
not providing data sets to participants and have them
work on the data locally, but keeping the data central
and allowing access via Application Programming
Interfaces (API), Virtual Machines (VM), or other
possibilities to ship executables. The objectives of
this article are to summarize and compare the current
approaches and consolidate the experiences of these
approaches to outline the next steps of EaaS,
particularly toward sustainable research
infrastructures. The article summarizes several
existing approaches to EaaS and analyzes their usage
scenarios and also the advantages and disadvantages.
The many factors influencing EaaS are summarized, and
the environment in terms of motivations for the various
stakeholders, from funding agencies to challenge
organizers, researchers and participants, to industry
interested in supplying real-world problems for which
they require solutions. EaaS solves many problems of
the current research environment, where data sets are
often not accessible to many researchers. Executables
of published tools are equally often not available
making the reproducibility of results impossible. EaaS,
however, creates reusable/citable data sets as well as
available executables. Many challenges remain, but such
a framework for research can also foster more
collaboration between researchers, potentially
increasing the speed of obtaining research results.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Yang:2018:ARR,
author = "Peilin Yang and Hui Fang and Jimmy Lin",
title = "{Anserini}: Reproducible Ranking Baselines Using
{Lucene}",
journal = j-JDIQ,
volume = "10",
number = "4",
pages = "16:1--16:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239571",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "This work tackles the perennial problem of
reproducible baselines in information retrieval
research, focusing on bag-of-words ranking models.
Although academic information retrieval researchers
have a long history of building and sharing systems,
they are primarily designed to facilitate the
publication of research papers. As such, these systems
are often incomplete, inflexible, poorly documented,
difficult to use, and slow, particularly in the context
of modern web-scale collections. Furthermore, the
growing complexity of modern software ecosystems and
the resource constraints most academic research groups
operate under make maintaining open-source systems a
constant struggle. However, except for a small number
of companies (mostly commercial web search engines)
that deploy custom infrastructure, Lucene has become
the de facto platform in industry for building search
applications. Lucene has an active developer base, a
large audience of users, and diverse capabilities to
work with heterogeneous collections at scale. However,
it lacks systematic support for ad hoc experimentation
using standard test collections. We describe Anserini,
an information retrieval toolkit built on Lucene that
fills this gap. Our goal is to simplify ad hoc
experimentation and allow researchers to easily
reproduce results with modern bag-of-words ranking
models on diverse test collections. With Anserini, we
demonstrate that Lucene provides a suitable framework
for supporting information retrieval research.
Experiments show that our system efficiently indexes
large web collections, provides modern ranking models
that are on par with research implementations in terms
of effectiveness, and supports low-latency query
evaluation to facilitate rapid experimentation",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Kiesel:2018:RWC,
author = "Johannes Kiesel and Florian Kneist and Milad Alshomary
and Benno Stein and Matthias Hagen and Martin
Potthast",
title = "Reproducible {Web} Corpora: Interactive Archiving with
Automatic Quality Assessment",
journal = j-JDIQ,
volume = "10",
number = "4",
pages = "17:1--17:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3239574",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "The evolution of web pages from static HTML pages
toward dynamic pieces of software has rendered
archiving them increasingly difficult. Nevertheless, an
accurate, reproducible web archive is a necessity to
ensure the reproducibility of web-based
research. Archiving web pages reproducibly, however, is
currently not part of best practices for web corpus
construction. As a result, and despite the ongoing
efforts of other stakeholders to archive the web, tools
for the construction of reproducible web corpora are
insufficient or ill-fitted. This article presents a new
tool tailored to this purpose. It relies on emulating
user interactions with a web page while recording all
network traffic. The customizable user interactions can
be replayed on demand, while requests sent by the
archived page are served with the recorded
responses. The tool facilitates reproducible user
studies, user simulations, and evaluations of
algorithms that rely on extracting data from web
pages. To evaluate our tool, we conduct the first
systematic assessment of reproduction quality for
rendered web pages. Using our tool, we create a corpus
of 10,000 web pages carefully sampled from the Common
Crawl and manually annotated with regard to
reproduction quality via crowdsourcing. Based on this
data, we test three approaches to automatic
reproduction-quality assessment. An off-the-shelf
neural network, trained on visual differences between
the web page during archiving and reproduction, matches
the manual assessments best. This automatic assessment
of reproduction quality allows for immediate bugfixing
during archiving and continuous development of our tool
as the web continues to evolve.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Roy:2018:CCD,
author = "Dwaipayan Roy and Mandar Mitra and Debasis Ganguly",
title = "To Clean or Not to Clean: Document Preprocessing and
Reproducibility",
journal = j-JDIQ,
volume = "10",
number = "4",
pages = "18:1--18:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242180",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
abstract = "Web document collections such as WT10G, GOV2, and
ClueWeb are widely used for text retrieval experiments.
Documents in these collections contain a fair amount of
non-content-related markup in the form of tags,
hyperlinks, and so on. Published articles that use
these corpora generally do not provide specific details
about how this markup information is handled during
indexing. However, this question turns out to be
important: Through experiments, we find that including
or excluding metadata in the index can produce
significantly different results with standard IR
models. More importantly, the effect varies across
models and collections. For example, metadata filtering
is found to be generally beneficial when using BM25, or
language modeling with Dirichlet smoothing, but can
significantly reduce retrieval effectiveness if
language modeling is used with Jelinek-Mercer
smoothing. We also observe that, in general, the
performance differences become more noticeable as the
amount of metadata in the test collections increase.
Given this variability, we believe that the details of
document preprocessing are significant from the point
of view of reproducibility. In a second set of
experiments, we also study the effect of preprocessing
on query expansion using RM3. In this case, once again,
we find that it is generally better to remove markup
before using documents for query expansion.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "18",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Srivastava:2019:EHQ,
author = "Divesh Srivastava and Monica Scannapieco and Thomas C.
Redman",
title = "Ensuring High-Quality Private Data for Responsible
Data Science: Vision and Challenges",
journal = j-JDIQ,
volume = "11",
number = "1",
pages = "1:1--1:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3287168",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3287168",
abstract = "High-quality data is critical for effective data
science. As the use of data science has grown, so too
have concerns that individuals' rights to privacy will
be violated. This has led to the development of data
protection regulations around the globe and the use of
sophisticated anonymization techniques to protect
privacy. Such measures make it more challenging for the
data scientist to understand the data, exacerbating
issues of data quality. Responsible data science aims
to develop useful insights from the data while fully
embracing these considerations. We pose the high-level
problem in this article, ``How can a data scientist
develop the needed trust that private data has high
quality?'' We then identify a series of challenges for
various data-centric communities and outline research
questions for data quality and privacy researchers,
which would need to be addressed to effectively answer
the problem posed in this article.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Rios:2019:CTF,
author = "Julio C{\'e}sar Cort{\'e}s R{\'\i}os and Norman W.
Paton and Alvaro A. A. Fernandes and Edward Abel and
John A. Keane",
title = "Crowdsourced Targeted Feedback Collection for
Multicriteria Data Source Selection",
journal = j-JDIQ,
volume = "11",
number = "1",
pages = "2:1--2:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3284934",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3284934",
abstract = "A multicriteria data source selection (MCSS) scenario
identifies, from a set of candidate data sources, the
subset that best meets users' needs. These needs are
expressed using several criteria, which are used to
evaluate the candidate data sources. An MCSS problem
can be solved using multidimensional optimization
techniques that trade off the different objectives.
Sometimes one may have uncertain knowledge regarding
how well the candidate data sources meet the criteria.
In order to overcome this uncertainty, one may rely on
end-users or crowds to annotate the data items produced
by the sources in relation to the selection criteria.
In this article, a proposed Targeted Feedback
Collection (TFC) approach is introduced that aims to
identify those data items on which feedback should be
collected, thereby providing evidence on how the
sources satisfy the required criteria. The proposed TFC
targets feedback by considering the confidence
intervals around the estimated criteria values, with a
view to increasing the confidence in the estimates that
are most relevant to the multidimensional optimization.
Variants of the proposed TFC approach have been
developed for use where feedback is expected to be
reliable (e.g., where it is provided by trusted
experts) and where feedback is expected to be
unreliable (e.g., from crowd workers). Both variants
have been evaluated, and positive results are reported
against other approaches to feedback collection,
including active learning, in experiments that involve
real-world datasets and crowdsourcing.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Dallachiesa:2019:ICQ,
author = "Michele Dallachiesa and Charu C. Aggarwal and Themis
Palpanas",
title = "Improving Classification Quality in Uncertain Graphs",
journal = j-JDIQ,
volume = "11",
number = "1",
pages = "3:1--3:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242095",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3242095",
abstract = "In many real applications that use and analyze
networked data, the links in the network graph may be
erroneous or derived from probabilistic techniques. In
such cases, the node classification problem can be
challenging, since the unreliability of the links may
affect the final results of the classification process.
If the information about link reliability is not used
explicitly, then the classification accuracy in the
underlying network may be affected adversely. In this
article, we focus on situations that require the
analysis of the uncertainty that is present in the
graph structure. We study the novel problem of node
classification in uncertain graphs, by treating
uncertainty as a first-class citizen. We propose two
techniques based on a Bayes model and automatic
parameter selection and show that the incorporation of
uncertainty in the classification process as a
first-class citizen is beneficial. We experimentally
evaluate the proposed approach using different real
data sets and study the behavior of the algorithms
under different conditions. The results demonstrate the
effectiveness and efficiency of our approach.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Casey:2019:FRR,
author = "K. Michael Casey and Kevin {Casey Jr.}",
title = "Financial Regulatory and Risk Management Challenges
Stemming from Firm-Specific Digital Misinformation",
journal = j-JDIQ,
volume = "11",
number = "1",
pages = "4:1--4:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3274655",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:00 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3274655",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Fan:2019:DGC,
author = "Wenfei Fan",
title = "Dependencies for Graphs: Challenges and
Opportunities",
journal = j-JDIQ,
volume = "11",
number = "2",
pages = "5:1--5:??",
month = may,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310230",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310230",
abstract = "What are graph dependencies? What do we need them for?
What new challenges do they introduce? This article
tackles these questions. It aims to incite curiosity
and interest in this emerging area of research.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Sillaber:2019:EDI,
author = "Christian Sillaber and Andrea Mussmann and Ruth Breu",
title = "Experience: Data and Information Quality Challenges in
Governance, Risk, and Compliance Management",
journal = j-JDIQ,
volume = "11",
number = "2",
pages = "6:1--6:??",
month = may,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3297721",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3297721",
abstract = "Governance, risk, and compliance (GRC) managers often
struggle to document the current state of their
organizations. This is due to the complexity of their
IS landscape, the complex regulatory and organizational
environment, and the frequent changes to both. GRC
tools seek to support them by integrating existing
information sources. However, a comprehensive analysis
of how the data is managed in such tools, as well as
the impact of data quality, is still missing. To build
a basis of empirical data, we conducted a series of
interviews with information security managers
responsible for GRC management activities in their
organizations. The results of a qualitative content
analysis of these interviews suggest that decision
makers largely depend on high-quality documentation but
struggle to maintain their documentation at the
required level for long periods of time. This work
discusses factors affecting the quality of GRC data and
information and provides insights into approaches
implemented by organizations to analyze, improve, and
maintain the quality of their GRC data and
information.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Lazar:2019:EEM,
author = "Alina Lazar and Ling Jin and C. Anna Spurlock and
Kesheng Wu and Alex Sim and Annika Todd",
title = "Evaluating the Effects of Missing Values and Mixed
Data Types on Social Sequence Clustering Using {t-SNE}
Visualization",
journal = j-JDIQ,
volume = "11",
number = "2",
pages = "7:1--7:??",
month = may,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3301294",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301294",
abstract = "The goal of this work is to investigate the impact of
missing values in clustering joint categorical social
sequences. Identifying patterns in sociodemographic
longitudinal data is important in a number of social
science settings. However, performing analytical
operations, such as clustering on life course
trajectories, is challenging due to the categorical and
multidimensional nature of the data, their mixed data
types, and corruption by missing and inconsistent
values. Data quality issues were investigated
previously on single variable sequences. To understand
their effects on multivariate sequence analysis, we
employ a dataset of mixed data types and missing
values, a dissimilarity measure designed for joint
categorical sequence data, together with dimensionality
reduction methodologies in a systematic design of
sequence clustering experiments. Given the categorical
nature of our data, we employ an ``edit'' distance
using optimal matching. Because each data record has
multiple variables of different types, we investigate
the impact of mixing these variables in a single
dissimilarity measure. Between variables with binary
values and those with multiple nominal values, we find
that the ability to overcome missing data problems is
more difficult in the nominal domain than in the binary
domain. Additionally, alignment of leading missing
values can result in systematic biases in dissimilarity
matrices and subsequently introduce both artificial
clusters and unrealistic interpretations of associated
data domains. We demonstrate the usage of t-distributed
stochastic neighborhood embedding to visually guide
mitigation of such biases by tuning the missing value
substitution cost parameter or determining an optimal
sequence span.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Muller:2019:ADQ,
author = "Daniel M{\"u}ller and Pratiksha Jain and Yieh-Funk
Te",
title = "Augmenting Data Quality through High-Precision Gender
Categorization",
journal = j-JDIQ,
volume = "11",
number = "2",
pages = "8:1--8:??",
month = may,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3297720",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3297720",
abstract = "Mappings of first name to gender have been widely
recognized as a critical tool for the completion,
study, and validation of data records in a range of
areas. In this study, we investigate how organizations
with large databases of existing entities can create
their own mappings between first names and gender and
how these mappings can be improved and utilized.
Therefore, we first explore a dataset with demographic
information on more than 4 million people, which was
provided by a car insurance company. Then, we study how
naming conventions have changed over time and how they
differ by nationality. Next, we build a probabilistic
first-name-to-gender mapping and augment the mapping by
adding nationality and decade of birth to improve the
mapping's performance. We test our mapping in two-label
and three-label settings and further validate our
mapping by categorizing patent filings by gender of the
inventor. We compare the results with previous studies'
outcomes and find that our mapping produces
high-precision results. We validate that the additional
information of nationality and year of birth improve
the precision scores of name-to-gender mappings.
Therefore, the proposed approach constitutes an
efficient process for improving the data quality of
organizations' records, if the gender attribute is
missing or unreliable.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Hassan:2019:ISI,
author = "Naeemul Hassan and Chengkai Li and Jun Yang and Cong
Yu",
title = "Introduction to the Special Issue on Combating Digital
Misinformation and Disinformation",
journal = j-JDIQ,
volume = "11",
number = "3",
pages = "9:1--9:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3321484",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3321484",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Zannettou:2019:WFI,
author = "Savvas Zannettou and Michael Sirivianos and Jeremy
Blackburn and Nicolas Kourtellis",
title = "The {Web} of False Information: Rumors, Fake News,
Hoaxes, Clickbait, and Various Other Shenanigans",
journal = j-JDIQ,
volume = "11",
number = "3",
pages = "10:1--10:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3309699",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309699",
abstract = "A new era of Information Warfare has arrived. Various
actors, including state-sponsored ones, are weaponizing
information on Online Social Networks to run
false-information campaigns with targeted manipulation
of public opinion on specific topics. These
false-information campaigns can have dire consequences
to the public: mutating their opinions and actions,
especially with respect to critical world events like
major elections. Evidently, the problem of false
information on the Web is a crucial one and needs
increased public awareness as well as immediate
attention from law enforcement agencies, public
institutions, and in particular, the research
community. In this article, we make a step in this
direction by providing a typology of the Web's
false-information ecosystem, composed of various types
of false-information, actors, and their motives. We
report a comprehensive overview of existing research on
the false-information ecosystem by identifying several
lines of work: (1) how the public perceives false
information; (2) understanding the propagation of false
information; (3) detecting and containing false
information on the Web; and (4) false information on
the political stage. In this work, we pay particular
attention to political false information as: (1) it can
have dire consequences to the community (e.g., when
election results are mutated) and (2) previous work
shows that this type of false information propagates
faster and further when compared to other types of
false information. Finally, for each of these lines of
work, we report several future research directions that
can help us better understand and mitigate the emerging
problem of false-information dissemination on the
Web.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Xue:2019:CAT,
author = "Hao Xue and Qiaozhi Wang and Bo Luo and Hyunjin Seo
and Fengjun Li",
title = "Content-Aware Trust Propagation Toward Online Review
Spam Detection",
journal = j-JDIQ,
volume = "11",
number = "3",
pages = "11:1--11:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3305258",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3305258",
abstract = "With the increasing popularity of online review
systems, a large volume of user-generated content
becomes available to help people make reasonable
judgments about the quality of services and products
from unknown providers. However, these platforms are
frequently abused since fraudulent information can be
freely inserted by potentially malicious users without
validation. Consequently, online review systems become
targets of individual and professional spammers, who
insert deceptive reviews by manipulating the rating
and/or the content of the reviews. In this work, we
propose a review spamming detection scheme based on the
deviation between the aspect-specific opinions
extracted from individual reviews and the aggregated
opinions on the corresponding aspects. In particular,
we model the influence on the trustworthiness of the
user due to his opinion deviations from the majority in
the form of a deviation-based penalty, and integrate
this penalty into a three-layer trust propagation
framework to iteratively compute the trust scores for
users, reviews, and review targets, respectively. The
trust scores are effective indicators of spammers,
since they reflect the overall deviation of a user from
the aggregated aspect-specific opinions across all
targets and all aspects. Experiments on the dataset
collected from Yelp.com show that the proposed
detection scheme based on aspect-specific content-aware
trust propagation is able to measure users'
trustworthiness based on opinions expressed in
reviews.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Atanasova:2019:AFC,
author = "Pepa Atanasova and Preslav Nakov and Llu{\'\i}s
M{\`a}rquez and Alberto Barr{\'o}n-Cede{\~n}o and
Georgi Karadzhov and Tsvetomila Mihaylova and Mitra
Mohtarami and James Glass",
title = "Automatic Fact-Checking Using Context and Discourse
Information",
journal = j-JDIQ,
volume = "11",
number = "3",
pages = "12:1--12:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3297722",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3297722",
abstract = "We study the problem of automatic fact-checking,
paying special attention to the impact of contextual
and discourse information. We address two related
tasks: (i) detecting check-worthy claims and (ii)
fact-checking claims. We develop supervised systems
based on neural networks, kernel-based support vector
machines, and combinations thereof, which make use of
rich input representations in terms of discourse cues
and contextual features. For the check-worthiness
estimation task, we focus on political debates, and we
model the target claim in the context of the full
intervention of a participant and the previous and
following turns in the debate, taking into account
contextual meta information. For the fact-checking
task, we focus on answer verification in a community
forum, and we model the veracity of the answer with
respect to the entire question-answer thread in which
it occurs as well as with respect to other related
posts from the entire forum. We develop annotated
datasets for both tasks and we run extensive
experimental evaluation, confirming that both types of
information-but especially contextual features-play an
important role.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Lin:2019:DPF,
author = "Peng Lin and Qi Song and Yinghui Wu and Jiaxing Pi",
title = "Discovering Patterns for Fact Checking in Knowledge
Graphs",
journal = j-JDIQ,
volume = "11",
number = "3",
pages = "13:1--13:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3286488",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3286488",
abstract = "This article presents a new framework that
incorporates graph patterns to support fact checking in
knowledge graphs. Our method discovers discriminant
graph patterns to construct classifiers for fact
prediction. First, we propose a class of graph fact
checking rules (GFCs). A GFC incorporates graph
patterns that best distinguish true and false facts of
generalized fact statements. We provide statistical
measures to characterize useful patterns that are both
discriminant and diversified. Second, we show that it
is feasible to discover GFCs in large graphs with
optimality guarantees. We develop an algorithm that
performs localized search to generate a stream of graph
patterns, and dynamically assemble the best GFCs from
multiple GFC sets, where each set ensures quality
scores within certain ranges. The algorithm guarantees
a $ (1 / 2 - \epsilon) $ approximation when it (early)
terminates. We also develop a space-efficient
alternative that dynamically spawns prioritized
patterns with best marginal gains to the verified GFCs.
It guarantees a $ (1 - 1 / e) $ approximation. Both
strategies guarantee a bounded time cost independent of
the size of the underlying graph. Third, to support
fact checking, we develop two classifiers, which make
use of top-ranked GFCs as predictive rules or
instance-level features of the pattern matches induced
by GFCs, respectively. Using real-world data, we
experimentally verify the efficiency and the
effectiveness of GFC-based techniques for fact checking
in knowledge graphs and verify its application in
knowledge exploration and news prediction.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Borges:2019:CSF,
author = "Lu{\'\i}s Borges and Bruno Martins and P{\'a}vel
Calado",
title = "Combining Similarity Features and Deep Representation
Learning for Stance Detection in the Context of
Checking Fake News",
journal = j-JDIQ,
volume = "11",
number = "3",
pages = "14:1--14:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3287763",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3287763",
abstract = "Fake news is nowadays an issue of pressing concern,
given its recent rise as a potential threat to
high-quality journalism and well-informed public
discourse. The Fake News Challenge (FNC-1) was
organized in early 2017 to encourage the development of
machine-learning-based classification systems for
stance detection (i.e., for identifying whether a
particular news article agrees, disagrees, discusses,
or is unrelated to a particular news headline), thus
helping in the detection and analysis of possible
instances of fake news. This article presents a novel
approach to tackle this stance detection problem, based
on the combination of string similarity features with a
deep neural network architecture that leverages ideas
previously advanced in the context of
learning-efficient text representations, document
classification, and natural language inference.
Specifically, we use bi-directional Recurrent Neural
Networks (RNNs), together with max-pooling over the
temporal/sequential dimension and neural attention, for
representing (i) the headline, (ii) the first two
sentences of the news article, and (iii) the entire
news article. These representations are then
combined/compared, complemented with similarity
features inspired on other FNC-1 approaches, and passed
to a final layer that predicts the stance of the
article toward the headline. We also explore the use of
external sources of information, specifically large
datasets of sentence pairs originally proposed for
training and evaluating natural language inference
methods to pre-train specific components of the neural
network architecture (e.g., the RNNs used for encoding
sentences). The obtained results attest to the
effectiveness of the proposed ideas and show that our
model, particularly when considering pre-training and
the combination of neural representations together with
similarity features, slightly outperforms the previous
state of the art.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Abiteboul:2019:TFD,
author = "Serge Abiteboul and Julia Stoyanovich",
title = "Transparency, Fairness, Data Protection, Neutrality:
Data Management Challenges in the Face of New
Regulation",
journal = j-JDIQ,
volume = "11",
number = "3",
pages = "15:1--15:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3310231",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310231",
abstract = "The data revolution continues to transform every
sector of science, industry, and government. Due to the
incredible impact of data-driven technology on society,
we are becoming increasingly aware of the imperative to
use data and algorithms responsibly-in accordance with
laws and ethical norms. In this article, we discuss
three recent regulatory frameworks: the European
Union's General Data Protection Regulation (GDPR), the
New York City Automated Decisions Systems (ADS) Law,
and the Net Neutrality principle, which aim to protect
the rights of individuals who are impacted by data
collection and analysis. These frameworks are prominent
examples of a global trend: Governments are starting to
recognize the need to regulate data-driven algorithmic
technology. Our goal in this article is to bring these
regulatory frameworks to the attention of the data
management community and to underscore the technical
challenges they raise and that we, as a community, are
well-equipped to address. The main takeaway of this
article is that legal and ethical norms cannot be
incorporated into data-driven systems as an
afterthought. Rather, we must think in terms of
responsibility by design, viewing it as a systems
requirement.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Bertino:2019:DTB,
author = "Elisa Bertino and Ahish Kundu and Zehra Sura",
title = "Data Transparency with Blockchain and {AI} Ethics",
journal = j-JDIQ,
volume = "11",
number = "4",
pages = "16:1--16:??",
month = sep,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3312750",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3312750",
abstract = "Providing a 360${}^\circ $ view of a given data item
especially for sensitive data is essential toward not
only protecting the data and associated privacy but
also assuring trust, compliance, and ethics of the
systems that use or manage such data. With the advent
of General Data Protection Regulation, California Data
Privacy Law, and other such regulatory requirements, it
is essential to support data transparency in all such
dimensions. Moreover, data transparency should not
violate privacy and security requirements. In this
article, we put forward a vision for how data
transparency would be achieved in a de-centralized
fashion using blockchain technology.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Fard:2019:ARA,
author = "Amir Ebrahimi Fard and Scott Cunningham",
title = "Assessing the Readiness of Academia in the Topic of
False and Unverified Information",
journal = j-JDIQ,
volume = "11",
number = "4",
pages = "17:1--17:??",
month = sep,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3313788",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3313788",
abstract = "The spread of false and unverified information has the
potential to inflict damage by harming the reputation
of individuals or organisations, shaking financial
markets, and influencing crowd decisions in important
events. This phenomenon needs to be properly curbed,
otherwise it can contaminate other aspects of our
social life. In this regard, academia as a key
institution against false and unverified information is
expected to play a pivotal role. Despite a great deal
of research in this arena, the amount of progress by
academia is not clear yet. This can lead to
misjudgements about the performance of the topic of
interest that can ultimately result in wrong science
policies regarding academic efforts for quelling false
and unverified information. In this research, we
address this issue by assessing the readiness of
academia in the topic of false and unverified
information. To this end, we adopt the emergence
framework and measure its dimensions (novelty, growth,
coherence, and impact) over more than 21,000 articles
published by academia about false and unverified
information. Our results show the current body of
research has had organic growth so far, which is not
promising enough for confronting the problem of false
and unverified information. To tackle this problem, we
suggest an external push strategy that, compared to the
early stages of the topic of interest, reinforces the
emergence dimensions and leads to a higher level in
every dimension.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Babcock:2019:DFF,
author = "Matthew Babcock and David M. Beskow and Kathleen M.
Carley",
title = "Different Faces of False: The Spread and Curtailment
of False Information in the {Black Panther Twitter}
Discussion",
journal = j-JDIQ,
volume = "11",
number = "4",
pages = "18:1--18:??",
month = sep,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3339468",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3339468",
abstract = "The task of combating false information online appears
daunting, in part due to a public focus on how quickly
it can spread and the clamor for automated
platform-based interventions. While such concerns can
be warranted, threat analysis and intervention design
both benefit from a fuller understanding of different
types of false information and of the community
responses to them. Here, we present a study of the most
tweeted about movie ever ( Black Panther ) in which the
spread of false information of four different types is
compared to the ad hoc Twitter community response. We
find that (1) false information tweets played a small
part in the overall conversation, (2) community-based
debunking and shaming responses to false posts about
attacks at theaters overwhelmed such posts by orders of
magnitude, (3) as another form of community response,
one type of false narrative (Satire) was used to attack
another (Fake Attacks), and (4) the four types of
false-information tweets differed in the use of
hashtags and in the role played by originating users
and responding users. Overall, this work helps to
illustrate the importance of investigating
``on-the-ground'' community responses to fake news and
other types of digital false information and to inform
identification and intervention design and
implementation.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "18",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Bosu:2019:EQB,
author = "Michael F. Bosu and Stephen G. Macdonell",
title = "Experience: Quality Benchmarking of Datasets Used in
Software Effort Estimation",
journal = j-JDIQ,
volume = "11",
number = "4",
pages = "19:1--19:??",
month = sep,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3328746",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3328746",
abstract = "Data is a cornerstone of empirical software
engineering (ESE) research and practice. Data underpin
numerous process and project management activities,
including the estimation of development effort and the
prediction of the likely location and severity of
defects in code. Serious questions have been raised,
however, over the quality of the data used in ESE. Data
quality problems caused by noise, outliers, and
incompleteness have been noted as being especially
prevalent. Other quality issues, although also
potentially important, have received less attention. In
this study, we assess the quality of 13 datasets that
have been used extensively in research on software
effort estimation. The quality issues considered in
this article draw on a taxonomy that we published
previously based on a systematic mapping of data
quality issues in ESE. Our contributions are as
follows: (1) an evaluation of the ``fitness for
purpose'' of these commonly used datasets and (2) an
assessment of the utility of the taxonomy in terms of
dataset benchmarking. We also propose a template that
could be used to both improve the ESE data
collection/submission process and to evaluate other
such datasets, contributing to enhanced awareness of
data quality issues in the ESE community and, in time,
the availability and use of higher-quality datasets.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "19",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Ding:2019:CSA,
author = "Junhua Ding and Xinchuan Li and Xiaojun Kang and
Venkat N. Gudivada",
title = "A Case Study of the Augmentation and Evaluation of
Training Data for Deep Learning",
journal = j-JDIQ,
volume = "11",
number = "4",
pages = "20:1--20:??",
month = sep,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3317573",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3317573",
abstract = "Deep learning has been widely used for extracting
values from big data. As many other machine learning
algorithms, deep learning requires significant training
data. Experiments have shown both the volume and the
quality of training data can significantly impact the
effectiveness of the value extraction. In some cases,
the volume of training data is not sufficiently large
for effectively training a deep learning model. In
other cases, the quality of training data is not high
enough to achieve the optimal performance. Many
approaches have been proposed for augmenting training
data to mitigate the deficiency. However, whether the
augmented data are ``fit for purpose'' of deep learning
is still a question. A framework for comprehensively
evaluating the effectiveness of the augmented data for
deep learning is still not available. In this article,
we first discuss a data augmentation approach for deep
learning. The approach includes two components: the
first one is to remove noisy data in a dataset using a
machine learning based classification to improve its
quality, and the second one is to increase the volume
of the dataset for effectively training a deep learning
model. To evaluate the quality of the augmented data in
fidelity, variety, and veracity, a data quality
evaluation framework is proposed. We demonstrated the
effectiveness of the data augmentation approach and the
data quality evaluation framework through studying an
automated classification of biology cell images using
deep learning. The experimental results clearly
demonstrated the impact of the volume and quality of
training data to the performance of deep learning and
the importance of the data quality evaluation. The data
augmentation approach and the data quality evaluation
framework can be straightforwardly adapted for deep
learning study in other domains.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "20",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Akhtar:2019:IAV,
author = "Zahaib Akhtar and Anh Minh Le and Yun Seong Nam and
Jessica Chen and Ramesh Govindan and Ethan Katz-Bassett
and Sanjay Rao and Jibin Zhan",
title = "Improving Adaptive Video Streaming through Session
Classification",
journal = j-JDIQ,
volume = "11",
number = "4",
pages = "21:1--21:??",
month = sep,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3309682",
ISSN = "1936-1955",
bibdate = "Tue Oct 22 07:17:01 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309682",
abstract = "With internet video gaining increasing popularity and
soaring to dominate network traffic, extensive studies
are being carried out on how to achieve higher Quality
of Experience (QoE) with the delivery of video content.
Associated with the chunk-based streaming protocol,
Adaptive Bitrate (ABR) algorithms have recently emerged
to cope with the diverse and fluctuating network
conditions by dynamically adjusting bitrates for future
chunks. This inevitably involves predicting the future
throughput of a video session. Some of the session
features like Internet Service Provider (ISP),
geographical location, and so on, could affect network
conditions and contain helpful information for this
throughput prediction. In this article, we consider how
our knowledge about the session features can be
utilized to improve ABR quality via customized
parameter settings. We present our ABR-independent,
QoE-driven, feature-based partition method to classify
the logged video sessions so that different parameter
settings could be adopted in different situations to
reach better quality. A variation of Decision Tree is
developed for the classification and has been applied
to a sample ABR for evaluation. The experiment shows
that our approach can improve the average bitrate of
the sample ABR by 36.1\% without causing the increase
of the rebuffering ratio where 99\% of the sessions can
get improvement. It can also improve the rebuffering
ratio by 87.7\% without causing the decrease of the
average bitrate, where, among those sessions involved
in rebuffering, 82\% receives improvement and 18\%
remains the same.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "21",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}
@Article{Milo:2020:GRD,
author = "Tova Milo",
title = "Getting Rid of Data",
journal = j-JDIQ,
volume = "12",
number = "1",
pages = "1:1--1:7",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3326920",
ISSN = "1936-1955",
bibdate = "Thu Jan 23 07:39:46 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3326920",
abstract = "We are experiencing an amazing data-centered
revolution. Incredible amounts of data are collected,
integrated, and analyzed, leading to key breakthroughs
in science and society. This well of knowledge,
however, is at a great risk if we do not dispense
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Firmani:2020:EDD,
author = "Donatella Firmani and Letizia Tanca and Riccardo
Torlone",
title = "Ethical Dimensions for Data Quality",
journal = j-JDIQ,
volume = "12",
number = "1",
pages = "2:1--2:5",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3362121",
ISSN = "1936-1955",
bibdate = "Thu Jan 23 07:39:46 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3362121",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Draisbach:2020:TPD,
author = "Uwe Draisbach and Peter Christen and Felix Naumann",
title = "Transforming Pairwise Duplicates to Entity Clusters
for High-quality Duplicate Detection",
journal = j-JDIQ,
volume = "12",
number = "1",
pages = "3:1--3:30",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3352591",
ISSN = "1936-1955",
bibdate = "Thu Jan 23 07:39:46 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3352591",
abstract = "Duplicate detection algorithms produce clusters of
database records, each cluster representing a single
real-world entity. As most of these algorithms use
pairwise comparisons, the resulting (transitive)
clusters can be inconsistent: Not all records
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Shakeel:2020:ASQ,
author = "Yusra Shakeel and Jacob Kr{\"u}ger and Ivonne Von
Nostitz-Wallwitz and Gunter Saake and Thomas Leich",
title = "Automated Selection and Quality Assessment of Primary
Studies: a Systematic Literature Review",
journal = j-JDIQ,
volume = "12",
number = "1",
pages = "4:1--4:26",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3356901",
ISSN = "1936-1955",
bibdate = "Thu Jan 23 07:39:46 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3356901",
abstract = "Researchers use systematic literature reviews (SLRs)
to synthesize existing evidence regarding a research
topic. While being an important means to condense
knowledge, conducting an SLR requires a large amount of
time and effort. Consequently, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Siagian:2020:RWC,
author = "Al Hafiz Akbar Maulana Siagian and Masayoshi
Aritsugi",
title = "Robustness of Word and Character {$N$}-gram
Combinations in Detecting Deceptive and Truthful
Opinions",
journal = j-JDIQ,
volume = "12",
number = "1",
pages = "5:1--5:24",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3349536",
ISSN = "1936-1955",
bibdate = "Thu Jan 23 07:39:46 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3349536",
abstract = "Opinions in reviews about the quality of products or
services can be important information for readers.
Unfortunately, such opinions may include deceptive ones
posted for some business reasons. To keep the opinions
as a valuable and trusted source of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Aswani:2020:EMM,
author = "Reema Aswani and Arpan Kumar Kar and P. Vigneswara
Ilavarasan",
title = "Experience: Managing Misinformation in Social
Media-Insights for Policymakers from {Twitter}
Analytics",
journal = j-JDIQ,
volume = "12",
number = "1",
pages = "6:1--6:18",
month = jan,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3341107",
ISSN = "1936-1955",
bibdate = "Thu Jan 23 07:39:46 MST 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3341107",
abstract = "Governance of misinformation is a serious concern in
social media platforms. Based on experiences gathered
from different case studies, we offer insights for the
policymakers on managing misinformation in social
media. These platforms are widely used \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Rula:2020:ESI,
author = "Anisa Rula and Amrapali Zaveri and Elena Simperl and
Elena Demidova",
title = "Editorial: Special Issue on Quality Assessment of
Knowledge Graphs Dedicated to the Memory of {Amrapali
Zaveri}",
journal = j-JDIQ,
volume = "12",
number = "2",
pages = "7:1--7:4",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3388748",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue May 19 09:08:07 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3388748",
abstract = "This editorial summarizes the content of the Special
Issue on Quality Assessment of Knowledge Graphs of the
Journal of Data and Information Quality (JDIQ). We
dedicate this special issue to the memory of our
colleague and friend Amrapali Zaveri.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Ahmadi:2020:MER,
author = "Naser Ahmadi and Viet-Phi Huynh and Vamsi Meduri and
Stefano Ortona and Paolo Papotti",
title = "Mining Expressive Rules in Knowledge Graphs",
journal = j-JDIQ,
volume = "12",
number = "2",
pages = "8:1--8:27",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3371315",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue May 19 09:08:07 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3371315",
abstract = "We describe RuDiK, an algorithm and a system for
mining declarative rules over RDF knowledge graphs
(KGs). RuDiK can discover rules expressing both
positive relationships between KG elements, e.g., ``if
two persons share at least one parent, they are
\ldots{}''.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Haller:2020:WLL,
author = "Armin Haller and Javier D. Fern{\'a}ndez and Maulik R.
Kamdar and Axel Polleres",
title = "What Are Links in Linked Open Data? {A}
Characterization and Evaluation of Links between
Knowledge Graphs on the {Web}",
journal = j-JDIQ,
volume = "12",
number = "2",
pages = "9:1--9:34",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3369875",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue May 19 09:08:07 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3369875",
abstract = "Linked Open Data promises to provide guiding
principles to publish interlinked knowledge graphs on
the Web in the form of findable, accessible,
interoperable, and reusable datasets. We argue that
while as such, Linked Data may be viewed as a basis for
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Mountantonakis:2020:CBU,
author = "Michalis Mountantonakis and Yannis Tzitzikas",
title = "Content-based Union and Complement Metrics for Dataset
Search over {RDF} Knowledge Graphs",
journal = j-JDIQ,
volume = "12",
number = "2",
pages = "10:1--10:31",
month = apr,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3372750",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Apr 27 07:10:38 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3372750",
abstract = "RDF Knowledge Graphs (or Datasets) contain valuable
information that can be exploited for a variety of
real-world tasks. However, due to the enormous size of
the available RDF datasets, it is difficult to discover
the most valuable datasets for a given \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Bertossi:2020:DQE,
author = "Leopoldo Bertossi and Floris Geerts",
title = "Data Quality and Explainable {AI}",
journal = j-JDIQ,
volume = "12",
number = "2",
pages = "11:1--11:9",
month = may,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3386687",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue May 19 09:08:07 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3386687",
abstract = "In this work, we provide some insights and develop
some ideas, with few technical details, about the role
of explanations in Data Quality in the context of
data-based machine learning models (ML). In this
direction, there are, as expected, roles for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Pitoura:2020:SMM,
author = "Evaggelia Pitoura",
title = "Social-minded Measures of Data Quality: Fairness,
Diversity, and Lack of Bias",
journal = j-JDIQ,
volume = "12",
number = "3",
pages = "12:1--12:8",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3404193",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 30 07:16:42 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3404193",
abstract = "For decades, research in data-driven algorithmic
systems has focused on improving efficiency (making
data access faster and lighter) and effectiveness
(providing relevant results to users). As data-driven
decision making becomes prevalent, there is an
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Colborne:2020:CDR,
author = "Adrienne Colborne and Michael Smit",
title = "Characterizing Disinformation Risk to Open Data in the
Post-Truth Era",
journal = j-JDIQ,
volume = "12",
number = "3",
pages = "13:1--13:13",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3328747",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 30 07:16:42 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3328747",
abstract = "Curated, labeled, high-quality data is a valuable
commodity for tasks such as business analytics and
machine learning. Open data is a common source of such
data-for example, retail analytics draws on open
demographic data, and weather forecast systems
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Blay:2020:IRF,
author = "Karen Banahene Blay and Steven Yeomans and Peter
Demian and Danny Murguia",
title = "The Information Resilience Framework: Vulnerabilities,
Capabilities, and Requirements",
journal = j-JDIQ,
volume = "12",
number = "3",
pages = "14:1--14:25",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3388786",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 30 07:16:42 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3388786",
abstract = "The quality of information is crucial to the success
of asset delivery, management, and performance in the
Digitised Architecture, Engineering, Construction, and
Operations (DAECO) sector. The exposure and sensitivity
of information to threats during \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Koumarelas:2020:DPD,
author = "Ioannis Koumarelas and Lan Jiang and Felix Naumann",
title = "Data Preparation for Duplicate Detection",
journal = j-JDIQ,
volume = "12",
number = "3",
pages = "15:1--15:24",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3377878",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 30 07:16:42 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3377878",
abstract = "Data errors represent a major issue in most
application workflows. Before any important task can
take place, a certain data quality has to be guaranteed
by eliminating a number of different errors that may
appear in data. Typically, most of these errors
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Visengeriyeva:2020:AMD,
author = "Larysa Visengeriyeva and Ziawasch Abedjan",
title = "Anatomy of Metadata for Data Curation",
journal = j-JDIQ,
volume = "12",
number = "3",
pages = "16:1--16:30",
month = jul,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3371925",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 30 07:16:42 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3371925",
abstract = "Real-world datasets often suffer from various data
quality problems. Several data cleaning solutions have
been proposed so far. However, data cleaning remains a
manual and iterative task that requires domain and
technical expertise. Exploiting metadata \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Polese:2020:ESI,
author = "Giuseppe Polese and Vincenzo Deufemia and Shaoxu
Song",
title = "Editorial: Special Issue on Metadata Discovery for
Assessing Data Quality",
journal = j-JDIQ,
volume = "12",
number = "4",
pages = "17:1--17:2",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3423321",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3423321",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Beneventano:2020:BET,
author = "Domenico Beneventano and Sonia Bergamaschi and Luca
Gagliardelli and Giovanni Simonini",
title = "{BLAST2}: an Efficient Technique for Loose Schema
Information Extraction from Heterogeneous Big Data
Sources",
journal = j-JDIQ,
volume = "12",
number = "4",
pages = "18:1--18:22",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394957",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3394957",
abstract = "We present BLAST2, a novel technique to efficiently
extract loose schema information, i.e., metadata that
can serve as a surrogate of the schema alignment task
within the Entity Resolution (ER) process, to identify
records that refer to the same real-\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "18",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Caruccio:2020:IDI,
author = "Loredana Caruccio and Stefano Cirillo",
title = "Incremental Discovery of Imprecise Functional
Dependencies",
journal = j-JDIQ,
volume = "12",
number = "4",
pages = "19:1--19:25",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397462",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3397462",
abstract = "Functional dependencies (fds) are one of the metadata
used to assess data quality and to perform data
cleaning operations. However, to pursue robustness with
respect to data errors, it has been necessary to devise
imprecise versions of functional \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "19",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Maiolo:2020:DPP,
author = "Sof{\'\i}a Maiolo and Lorena Etcheverry and Adriana
Marotta",
title = "Data Profiling in Property Graph Databases",
journal = j-JDIQ,
volume = "12",
number = "4",
pages = "20:1--20:27",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3409473",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3409473",
abstract = "Property Graph databases are being increasingly used
within the industry as a powerful and flexible way to
model real-world scenarios. With this flexibility, a
great challenge appears regarding profiling tasks due
to the need of adapting them to these \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "20",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Ahmadi:2020:RPC,
author = "Naser Ahmadi and Thi-Thuy-Duyen Truong and Le-Hong-Mai
Dao and Stefano Ortona and Paolo Papotti",
title = "{RuleHub}: a Public Corpus of Rules for Knowledge
Graphs",
journal = j-JDIQ,
volume = "12",
number = "4",
pages = "21:1--21:22",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3409384",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3409384",
abstract = "Entity-centric knowledge graphs (KGs) are now popular
to collect facts about entities. KGs have rich schemas
with a large number of different types and predicates
to describe the entities and their relationships. On
these rich schemas, logical rules are \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "21",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Lammel:2020:MHQ,
author = "Philipp L{\"a}mmel and Benjamin Dittwald and Lina
Bruns and Nikolay Tcholtchev and Yuri Glikman and Silke
Cuno and Mathias Fl{\"u}gge and Ina Schieferdecker",
title = "Metadata Harvesting and Quality Assurance within Open
Urban Platforms",
journal = j-JDIQ,
volume = "12",
number = "4",
pages = "22:1--22:20",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3409795",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3409795",
abstract = "During the past years, various activities and concepts
have shaped and prepared the path for the development
of urban environments toward smart cities across the
world. One of the initial activities was relating to
the opening of vast amounts of data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "22",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Li:2021:DEM,
author = "Yuliang Li and Jinfeng Li and Yoshihiko Suhara and Jin
Wang and Wataru Hirota and Wang-Chiew Tan",
title = "Deep Entity Matching: Challenges and Opportunities",
journal = j-JDIQ,
volume = "13",
number = "1",
pages = "1:1--1:17",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3431816",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Feb 10 10:35:23 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3431816",
abstract = "Entity matching refers to the task of determining
whether two different representations refer to the same
real-world entity. It continues to be a prevalent
problem for many organizations where data resides in
different sources and duplicates the need to \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Loster:2021:KTE,
author = "Michael Loster and Ioannis Koumarelas and Felix
Naumann",
title = "Knowledge Transfer for Entity Resolution with
{Siamese} Neural Networks",
journal = j-JDIQ,
volume = "13",
number = "1",
pages = "2:1--2:25",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3410157",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Feb 10 10:35:23 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3410157",
abstract = "The integration of multiple data sources is a common
problem in a large variety of applications.
Traditionally, handcrafted similarity measures are used
to discover, merge, and integrate multiple
representations of the same entity-duplicates-into a
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Neto:2021:DGD,
author = "Nelson Novaes Neto and Stuart Madnick and Anchises
Moraes G. {De Paula} and Natasha Malara Borges",
title = "Developing a Global Data Breach Database and the
Challenges Encountered",
journal = j-JDIQ,
volume = "13",
number = "1",
pages = "3:1--3:33",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3439873",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Feb 10 10:35:23 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3439873",
abstract = "If the mantra ``data is the new oil'' of our digital
economy is correct, then data leak incidents are the
critical disasters in the online society. The initial
goal of our research was to present a comprehensive
database of data breaches of personal \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Fazzolari:2021:EIO,
author = "Michela Fazzolari and Francesco Buccafurri and
Gianluca Lax and Marinella Petrocchi",
title = "Experience: Improving Opinion Spam Detection by
Cumulative Relative Frequency Distribution",
journal = j-JDIQ,
volume = "13",
number = "1",
pages = "4:1--4:16",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3439307",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Feb 10 10:35:23 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3439307",
abstract = "Over the past few years, online reviews have become
very important, since they can influence the purchase
decision of consumers and the reputation of businesses.
Therefore, the practice of writing fake reviews can
have severe consequences on customers \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Costa:2021:EQA,
author = "Rog{\'e}rio Lu{\'\i}s C. Costa and Enrico Miranda and
Paulo Dias and Jos{\'e} Moreira",
title = "Experience: Quality Assessment and Improvement on a
Forest Fire Dataset",
journal = j-JDIQ,
volume = "13",
number = "1",
pages = "5:1--5:13",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3428155",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Feb 10 10:35:23 MST 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3428155",
abstract = "Spatio-temporal data can be used to study and simulate
the movement and behavior of objects and natural
phenomena. However, the use of real-world data raises
several challenges related to its acquisition,
representation, and quality. This article \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Aljawarneh:2021:ESIa,
author = "Shadi Aljawarneh and Juan A. Lara",
title = "Editorial: Special Issue on Quality Assessment and
Management in Big Data --- {Part I}",
journal = j-JDIQ,
volume = "13",
number = "2",
pages = "6:1--6:3",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3449052",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 1 08:31:27 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3449052",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Cummings:2021:SCM,
author = "Mary L. Cummings and Songpo Li",
title = "Subjectivity in the Creation of Machine Learning
Models",
journal = j-JDIQ,
volume = "13",
number = "2",
pages = "7:1--7:19",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418034",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 1 08:31:27 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3418034",
abstract = "Transportation analysts are inundated with requests to
apply popular machine learning modeling techniques to
datasets to uncover never-before-seen relationships
that could potentially revolutionize safety,
congestion, and mobility. However, the results
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Shah:2021:GBD,
author = "Syed Iftikhar Hussain Shah and Vassilios Peristeras
and Ioannis Magnisalis",
title = "Government Big Data Ecosystem: Definitions, Types of
Data, Actors, and Roles and the Impact in Public
Administrations",
journal = j-JDIQ,
volume = "13",
number = "2",
pages = "8:1--8:25",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3425709",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 1 08:31:27 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3425709",
abstract = "The public sector, private firms, business community,
and civil society are generating data that are high in
volume, veracity, and velocity and come from a
diversity of sources. This type of data is today known
as big data. Public administrations pursue \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Batayneh:2021:SSI,
author = "Abeer A. {Al Batayneh} and Malik Qasaimeh and Raad S.
Al-Qassas",
title = "A Scoring System for Information Security Governance
Framework Using Deep Learning Algorithms: a Case Study
on the Banking Sector",
journal = j-JDIQ,
volume = "13",
number = "2",
pages = "9:1--9:34",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418172",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 1 08:31:27 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3418172",
abstract = "Cybercrime reports showed an increase in the number of
attacks targeting financial institutions. Indeed, banks
were the target of 30\% of the total number of
cyber-attacks. One of the recommended methods for
driving the security challenges is to implement
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Fraihat:2021:BIF,
author = "Salam Fraihat and Walid A. Salameh and Ammar Elhassan
and Bushra Abu Tahoun and Maisa Asasfeh",
title = "Business Intelligence Framework Design and
Implementation: a Real-estate Market Case Study",
journal = j-JDIQ,
volume = "13",
number = "2",
pages = "10:1--10:16",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3422669",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 1 08:31:27 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3422669",
abstract = "This article builds on previous work in the area of
real-world applications of Business Intelligence (BI)
technology. It illustrates the analysis, modeling, and
framework design of a BI solution with high data
quality to provide reliable analytics and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Khalemsky:2021:EDV,
author = "A. Khalemsky and R. Gelbard",
title = "{ExpanDrogram}: Dynamic Visualization of Big Data
Segmentation over Time",
journal = j-JDIQ,
volume = "13",
number = "2",
pages = "11:1--11:27",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3434778",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 1 08:31:27 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3434778",
abstract = "In dynamic and big data environments the visualization
of a segmentation process over time often does not
enable the user to simultaneously track entire pieces.
The key points are sometimes incomparable, and the user
is limited to a static visual \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Radhakrishna:2021:CPV,
author = "Vangipuram Radhakrishna and Gali Suresh Reddy and
Puligadda Veereswara Kumar and Vinjamuri Janaki",
title = "Challenge Paper: The Vision for Time Profiled Temporal
Association Mining",
journal = j-JDIQ,
volume = "13",
number = "2",
pages = "12:1--12:8",
month = jun,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3404198",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jul 1 08:31:27 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3404198",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Aljawarneh:2021:ESIb,
author = "Shadi Aljawarneh and Juan A. Lara",
title = "Editorial: Special Issue on Quality Assessment and
Management in Big Data --- {Part II}",
journal = j-JDIQ,
volume = "13",
number = "3",
pages = "13:1--13:3",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3449056",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Aug 2 15:58:12 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3449056",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{J:2021:HIM,
author = "Sreelakshmy I. J. and Binsu C. Kovoor",
title = "A Hybrid Inpainting Model Combining Diffusion and
Enhanced Exemplar Methods",
journal = j-JDIQ,
volume = "13",
number = "3",
pages = "14:1--14:19",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418035",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Aug 2 15:58:12 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3418035",
abstract = "Image inpainting is a technique in the world of image
editing where missing portions of the image are
estimated and filled with the help of available or
external information. In the proposed model, a novel
hybrid inpainting algorithm is implemented, which
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Chirkova:2021:EDR,
author = "Rada Chirkova and Jon Doyle and Juan Reutter",
title = "Ensuring Data Readiness for Quality Requirements with
Help from Procedure Reuse",
journal = j-JDIQ,
volume = "13",
number = "3",
pages = "15:1--15:15",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3428154",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Aug 2 15:58:12 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3428154",
abstract = "Assessing and improving the quality of data are
fundamental challenges in Big-Data applications. These
challenges have given rise to numerous solutions
targeting transformation, integration, and cleaning of
data. However, while schema design, data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Joy:2021:OBL,
author = "Jeevamol Joy and Nisha S. Raj and Renumol V. G.",
title = "Ontology-based E-learning Content Recommender System
for Addressing the Pure Cold-start Problem",
journal = j-JDIQ,
volume = "13",
number = "3",
pages = "16:1--16:27",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3429251",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Aug 2 15:58:12 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3429251",
abstract = "E-learning recommender systems are gaining
significance nowadays due to its ability to enhance the
learning experience by providing tailor-made services
based on learner preferences. A Personalized Learning
Environment (PLE) that automatically adapts to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Roy:2021:UNA,
author = "Anurag Roy and Shalmoli Ghosh and Kripabandhu Ghosh
and Saptarshi Ghosh",
title = "An Unsupervised Normalization Algorithm for Noisy
Text: a Case Study for Information Retrieval and Stance
Detection",
journal = j-JDIQ,
volume = "13",
number = "3",
pages = "17:1--17:25",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418036",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Aug 2 15:58:12 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3418036",
abstract = "A large fraction of textual data available today
contains various types of ``noise,'' such as OCR noise
in digitized documents, noise due to informal writing
style of users on microblogging sites, and so on. To
enable tasks such as search/retrieval and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Liu:2021:EAC,
author = "Zhicheng Liu and Yang Zhang and Ruihong Huang and
Zhiwei Chen and Shaoxu Song and Jianmin Wang",
title = "{EXPERIENCE}: Algorithms and Case Study for Explaining
Repairs with Uniform Profiles over {IoT} Data",
journal = j-JDIQ,
volume = "13",
number = "3",
pages = "18:1--18:17",
month = jul,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3436239",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Aug 2 15:58:12 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3436239",
abstract = "IoT data with timestamps are often found with
outliers, such as GPS trajectories or sensor readings.
While existing systems mostly focus on detecting
temporal outliers without explanations and repairs, a
decision maker may be more interested in the cause
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "18",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Kubiczek:2021:CRC,
author = "Jakub Kubiczek and BartLomiej Hadasik",
title = "Challenges in Reporting the {COVID-19} Spread and its
Presentation to the Society",
journal = j-JDIQ,
volume = "13",
number = "4",
pages = "19:1--19:7",
month = dec,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3470851",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3470851",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "19",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Tufis:2021:TCD,
author = "Mihnea Tufis and Ludovico Boratto",
title = "Toward a Complete Data Valuation Process. Challenges
of Personal Data",
journal = j-JDIQ,
volume = "13",
number = "4",
pages = "20:1--20:7",
month = dec,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447269",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3447269",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "20",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Nayak:2021:EAP,
author = "Stuti Nayak and Amrapali Zaveri and Pedro Hernandez
Serrano and Michel Dumontier",
title = "Experience: Automated Prediction of Experimental
Metadata from Scientific Publications",
journal = j-JDIQ,
volume = "13",
number = "4",
pages = "21:1--21:11",
month = dec,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3451219",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3451219",
abstract = "While there exists an abundance of open biomedical
data, the lack of high-quality metadata makes it
challenging for others to find relevant datasets and to
reuse them for another purpose. In particular, metadata
are useful to understand the nature and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "21",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Chen:2021:BBA,
author = "Jessica Chen and Henry Milner and Ion Stoica and Jibin
Zhan",
title = "Benchmark of Bitrate Adaptation in Video Streaming",
journal = j-JDIQ,
volume = "13",
number = "4",
pages = "22:1--22:24",
month = dec,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3468063",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3468063",
abstract = "The HTTP adaptive streaming technique opened the door
to cope with the fluctuating network conditions during
the streaming process by dynamically adjusting the
volume of the future chunks to be downloaded. The
bitrate selection in this adjustment \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "22",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Amaral:2021:AQS,
author = "Gabriel Amaral and Alessandro Piscopo and
Lucie-aim{\'e}e Kaffee and Odinaldo Rodrigues and Elena
Simperl",
title = "Assessing the Quality of Sources in {Wikidata} Across
Languages: a Hybrid Approach",
journal = j-JDIQ,
volume = "13",
number = "4",
pages = "23:1--23:35",
month = dec,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3484828",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Nov 3 09:43:30 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3484828",
abstract = "Wikidata is one of the most important sources of
structured data on the web, built by a worldwide
community of volunteers. As a secondary source, its
contents must be backed by credible references; this is
particularly important, as Wikidata explicitly
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "23",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Barhamgi:2022:ESIa,
author = "Mahmoud Barhamgi and Elisa Bertino",
title = "Editorial: Special Issue on Data Transparency-Data
Quality, Annotation, and Provenance",
journal = j-JDIQ,
volume = "14",
number = "1",
pages = "1:1--1:3",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494454",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Feb 3 06:14:38 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3494454",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Thirumuruganathan:2022:AAA,
author = "Saravanan Thirumuruganathan and Mayuresh Kunjir and
Mourad Ouzzani and Sanjay Chawla",
title = "Automated Annotations for {AI} Data and Model
Transparency",
journal = j-JDIQ,
volume = "14",
number = "1",
pages = "2:1--2:9",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460000",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Feb 3 06:14:38 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3460000",
abstract = "The data and Artificial Intelligence revolution has
had a massive impact on enterprises, governments, and
society alike. It is fueled by two key factors. First,
data have become increasingly abundant and are often
available openly. Enterprises have more \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Geisler:2022:KDD,
author = "Sandra Geisler and Maria-Esther Vidal and Cinzia
Cappiello and Bernadette Farias L{\'o}scio and Avigdor
Gal and Matthias Jarke and Maurizio Lenzerini and Paolo
Missier and Boris Otto and Elda Paja and Barbara
Pernici and Jakob Rehof",
title = "Knowledge-Driven Data Ecosystems Toward Data
Transparency",
journal = j-JDIQ,
volume = "14",
number = "1",
pages = "3:1--3:12",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3467022",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Feb 3 06:14:38 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3467022",
abstract = "A data ecosystem (DE) offers a keystone-player or
alliance-driven infrastructure that enables the
interaction of different stakeholders and the
resolution of interoperability issues among shared
data. However, despite years of research in data
governance \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Belhajjame:2022:AWP,
author = "Khalid Belhajjame",
title = "On the Anonymization of Workflow Provenance without
Compromising the Transparency of Lineage",
journal = j-JDIQ,
volume = "14",
number = "1",
pages = "4:1--4:27",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460207",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Feb 3 06:14:38 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3460207",
abstract = "Workflows have been adopted in several scientific
fields as a tool for the specification and execution of
scientific experiments. In addition to automating the
execution of experiments, workflow systems often
include capabilities to record provenance \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Dargahi:2022:IBC,
author = "Tooska Dargahi and Hossein Ahmadvand and Mansour Naser
Alraja and Chia-Mu Yu",
title = "Integration of Blockchain with Connected and
Autonomous Vehicles: Vision and Challenge",
journal = j-JDIQ,
volume = "14",
number = "1",
pages = "5:1--5:10",
month = mar,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460003",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Feb 3 06:14:38 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3460003",
abstract = "Connected and Autonomous Vehicles (CAVs) are
introduced to improve individuals' quality of life by
offering a wide range of services. They collect a huge
amount of data and exchange them with each other and
the infrastructure. The collected data usually includes
sensitive information about the users and the
surrounding environment. Therefore, data security and
privacy are among the main challenges in this industry.
Blockchain, an emerging distributed ledger, has been
considered by the research community as a potential
solution for enhancing data security, integrity, and
transparency in Intelligent Transportation Systems
(ITS). However, despite the emphasis of governments on
the transparency of personal data protection practices,
CAV stakeholders have not been successful in
communicating appropriate information with the end
users regarding the procedure of collecting, storing,
and processing their personal data, as well as the data
ownership. This article provides a vision of the
opportunities and challenges of adopting blockchain in
ITS from the ``data transparency'' and ``privacy''
perspective. The main aim is to answer the following
questions: (1) Considering the amount of personal data
collected by the CAVs, such as location, how would the
integration of blockchain technology affect
transparency, fairness, and lawfulness of personal data
processing concerning the data subjects (as this is one
of the main principles in the existing data protection
regulations)? (2) How can the trade-off between
transparency and privacy be addressed in
blockchain-based ITS use cases?",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Barhamgi:2022:ESIb,
author = "Mahmoud Barhamgi and Elisa Bertino",
title = "Editorial: Special Issue on Data Transparency-Uses
Cases and Applications",
journal = j-JDIQ,
volume = "14",
number = "2",
pages = "6:1--6:3",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3494455",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Apr 23 13:23:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3494455",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Badr:2022:DTF,
author = "Youakim Badr and Rahul Sharma",
title = "Data Transparency and Fairness Analysis of the {NYPD
Stop-and-Frisk Program}",
journal = j-JDIQ,
volume = "14",
number = "2",
pages = "7:1--7:14",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460533",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Apr 23 13:23:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3460533",
abstract = "Given the increased concern of racial disparities in
the stop-and-frisk programs, the New York Police
Department (NYPD) requires publicly displaying detailed
data for all the stops conducted by police authorities,
including the suspected offense and race \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Chen:2022:ATR,
author = "Chien-Lun Chen and Leana Golubchik and Ranjan Pal",
title = "Achieving Transparency Report Privacy in Linear Time",
journal = j-JDIQ,
volume = "14",
number = "2",
pages = "8:1--8:56",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460001",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Apr 23 13:23:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3460001",
abstract = "An accountable algorithmic transparency report (ATR)
should ideally investigate (a) transparency of the
underlying algorithm, and (b) fairness of the
algorithmic decisions, and at the same time preserve
data subjects' privacy. However, a provably formal
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Mauri:2022:EDM,
author = "Lara Mauri and Ernesto Damiani",
title = "Estimating Degradation of Machine Learning Data
Assets",
journal = j-JDIQ,
volume = "14",
number = "2",
pages = "9:1--9:15",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3446331",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Apr 23 13:23:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3446331",
abstract = "Large-scale adoption of Artificial Intelligence and
Machine Learning (AI-ML) models fed by heterogeneous,
possibly untrustworthy data sources has spurred
interest in estimating degradation of such models due
to spurious, adversarial, or low-quality data
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Wang:2022:TAL,
author = "Bin Wang and Pengfei Guo and Xing Wang and Yongzhong
He and Wei Wang",
title = "Transparent Aspect-Level Sentiment Analysis Based on
Dependency Syntax Analysis and Its Application on
{COVID-19}",
journal = j-JDIQ,
volume = "14",
number = "2",
pages = "10:1--10:24",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460002",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Apr 23 13:23:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3460002",
abstract = "Aspect-level sentiment analysis identifies
fine-grained emotion for target words. There are three
major issues in current models of aspect-level
sentiment analysis. First, few models consider the
natural language semantic characteristics of the texts.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Hsu:2022:EAM,
author = "Che-Yun Hsu and Ting-Rui Chen and Hung-Hsuan Chen",
title = "Experience: Analyzing Missing {Web} Page Visits and
Unintentional {Web} Page Visits from the Client-side
{Web} Logs",
journal = j-JDIQ,
volume = "14",
number = "2",
pages = "11:1--11:17",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3490392",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Apr 23 13:23:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3490392",
abstract = "Web logs have been widely used to represent the web
page visits of online users. However, we found that web
logs in Chrome's browsing history only record 57\% of
users' visited websites, i.e., nearly half of a user's
website visits are not recorded. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Patnaik:2022:WIE,
author = "Sudhir Kumar Patnaik and C. Narendra Babu",
title = "A {Web} Information Extraction Framework with Adaptive
and Failure Prediction Feature",
journal = j-JDIQ,
volume = "14",
number = "2",
pages = "12:1--12:21",
month = jun,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3495008",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Apr 23 13:23:12 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3495008",
abstract = "The amount of information available on the internet
today requires effective information extraction and
processing to offer hyper-personalized user
experiences. Inability to extract information by using
traditional and machine learning techniques due to
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Ilyas:2022:MLD,
author = "Ihab F. Ilyas and Theodoros Rekatsinas",
title = "Machine Learning and Data Cleaning: Which Serves the
Other?",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "13:1--13:11",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3506712",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3506712",
abstract = "The last few years witnessed significant advances in
building automated or semi-automated data quality, data
cleaning and data integration systems powered by
machine learning (ML). In parallel, large deployment of
ML systems in business, science, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Santoro:2022:ESI,
author = "Donatello Santoro and Saravanan Thirumuruganathan and
Paolo Papotti",
title = "Editorial: Special Issue on Deep Learning for Data
Quality",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "14:1--14:3",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3513135",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3513135",
abstract = "This editorial summarizes the content of the Special
Issue on Deep Learning for Data Quality of the Journal
of Data and Information Quality (JDIQ).",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Wu:2022:CTL,
author = "Renzhi Wu and Nilaksh Das and Sanya Chaba and Sakshi
Gandhi and Duen Horng Chau and Xu Chu",
title = "A Cluster-then-label Approach for Few-shot Learning
with Application to Automatic Image Data Labeling",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "15:1--15:23",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3491232",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3491232",
abstract = "Few-shot learning (FSL) aims at learning to generalize
from only a small number of labeled examples for a
given target task. Most current state-of-the-art FSL
methods typically have two limitations. First, they
usually require access to a source dataset \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Shraga:2022:PQA,
author = "Roee Shraga and Avigdor Gal",
title = "{PoWareMatch}: a Quality-aware Deep Learning Approach
to Improve Human Schema Matching",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "16:1--16:27",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3483423",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3483423",
abstract = "Schema matching is a core task of any data integration
process. Being investigated in the fields of databases,
AI, Semantic Web, and data mining for many years, the
main challenge remains the ability to generate quality
matches among data concepts (e.g., \ldots{}).",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Haque:2022:NIC,
author = "Md Enamul Haque and Mehmet Engin Tozal",
title = "Negative Insurance Claim Generation Using Distance
Pooling on Positive Diagnosis-Procedure Bipartite
Graphs",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "17:1--17:26",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3531347",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3531347",
abstract = "Negative samples in health and medical insurance
domain refer to fraudulent or erroneous insurance
claims that may include inconsistent
diagnosis-procedure relations with respect to a medical
coding system. Unfortunately, only a few datasets are
publicly \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Demetrescu:2022:WCC,
author = "Camil Demetrescu and Irene Finocchi and Andrea
Ribichini and Marco Schaerf",
title = "Which Conference Is That? {A} Case Study in Computer
Science",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "18:1--18:13",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3519031",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3519031",
abstract = "Conferences play a major role in some disciplines such
as computer science and are often used in research
quality evaluation exercises. Differently from journals
and books, for which ISSN and ISBN codes provide
unambiguous keys, recognizing the conference \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "18",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Gram:2022:DIH,
author = "Dennis Gram and Pantelis Karapanagiotis and Marius
Liebald and Uwe Walz",
title = "Design and Implementation of a Historical {German}
Firm-level Financial Database",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "19:1--19:22",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3531533",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3531533",
abstract = "Broad, long-term financial, and economic datasets are
scarce resources, particularly in the European context.
In this article, we present an approach for an
extensible data model that is adaptable to future
changes in technologies and sources. This model
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "19",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Zheng:2022:CDC,
author = "Zheng Zheng and Longtao Zheng and Morteza
Alipourlangouri and Fei Chiang and Lukasz Golab and
Jaroslaw Szlichta and Sridevi Baskaran",
title = "Contextual Data Cleaning with Ontology Functional
Dependencies",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "20:1--20:26",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3524303",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3524303",
abstract = "Functional Dependencies define attribute relationships
based on syntactic equality, and when used in data
cleaning, they erroneously label syntactically
different but semantically equivalent values as errors.
We explore dependency-based data cleaning with
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "20",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Hacker:2022:ACC,
author = "Philipp Hacker and Felix Naumann and Tobias Friedrich
and Stefan Grundmann and Anja Lehmann and Herbert
Zech",
title = "{AI} Compliance --- Challenges of Bridging Data
Science and Law",
journal = j-JDIQ,
volume = "14",
number = "3",
pages = "21:1--21:4",
month = sep,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3531532",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Wed Aug 10 06:32:51 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3531532",
abstract = "This vision article outlines the main building blocks
of what we term AI Compliance, an effort to bridge two
complementary research areas: computer science and the
law. Such research has the goal to model, measure, and
affect the quality of AI artifacts, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "21",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Li:2022:DCC,
author = "Yuanxia Li and Faiz Currim and Sudha Ram",
title = "Data Completeness and Complex Semantics in Conceptual
Modeling: The Need for a Disaggregation Construct",
journal = j-JDIQ,
volume = "14",
number = "4",
pages = "22:1--22:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532784",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:10 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3532784",
abstract = "Conceptual modeling is important for developing
databases that maintain the integrity and quality of
stored information. However, classical conceptual
models have often been assumed to work on
well-maintained and high-quality data. With the
advancement \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "22",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Johnson:2022:SCB,
author = "Justin M. Johnson and Taghi M. Khoshgoftaar",
title = "A Survey on Classifying Big Data with Label Noise",
journal = j-JDIQ,
volume = "14",
number = "4",
pages = "23:1--23:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3492546",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:10 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3492546",
abstract = "Class label noise is a critical component of data
quality that directly inhibits the predictive
performance of machine learning algorithms. While many
data-level and algorithm-level methods exist for
treating label noise, the challenges associated with
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "23",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Firmani:2022:ESI,
author = "Donatella Firmani and Letizia Tanca and Riccardo
Torlone",
title = "Editorial: Special Issue on Data Quality and Ethics",
journal = j-JDIQ,
volume = "14",
number = "4",
pages = "24:1--24:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561202",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:10 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3561202",
abstract = "This editorial summarizes the content of the Special
Issue on Data Quality and Ethics of the Journal of Data
and Information Quality (JDIQ). The issue accepted
submissions from June 1 to July 30, 2021.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "24",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Mecati:2022:DRB,
author = "Mariachiara Mecati and Antonio Vetr{\`o} and Marco
Torchiano",
title = "Detecting Risk of Biased Output with Balance
Measures",
journal = j-JDIQ,
volume = "14",
number = "4",
pages = "25:1--25:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3530787",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:10 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3530787",
abstract = "Data have become a fundamental element of the
management and productive infrastructures of our
society, fuelling digitization of organizational and
decision-making processes at an impressive speed. This
transition shows lights and shadows, and the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "25",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Accinelli:2022:CBA,
author = "Chiara Accinelli and Barbara Catania and Giovanna
Guerrini and Simone Minisi",
title = "A Coverage-based Approach to Nondiscrimination-aware
Data Transformation",
journal = j-JDIQ,
volume = "14",
number = "4",
pages = "26:1--26:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546913",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:10 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3546913",
abstract = "The development of technological solutions satisfying
nondiscriminatory requirements is one of the main
current challenges for data processing. Back-end
operators for preparing, i.e., extracting and
transforming, data play a relevant role w.r.t.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "26",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Jagadish:2022:MFD,
author = "H. Jagadish and Julia Stoyanovich and Bill Howe",
title = "The Many Facets of Data Equity",
journal = j-JDIQ,
volume = "14",
number = "4",
pages = "27:1--27:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3533425",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:10 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3533425",
abstract = "Data-driven systems can induce, operationalize, and
amplify systemic discrimination in a variety of ways.
As data scientists, we tend to prefer to isolate and
formalize equity problems to make them amenable to
narrow technical solutions. However, this \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "27",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Mazilu:2022:FAD,
author = "Lacramioara Mazilu and Norman W. Paton and Nikolaos
Konstantinou and Alvaro A. A. Fernandes",
title = "Fairness-aware Data Integration",
journal = j-JDIQ,
volume = "14",
number = "4",
pages = "28:1--28:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3519419",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:10 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3519419",
abstract = "Machine learning can be applied in applications that
take decisions that impact people's lives. Such
techniques have the potential to make decision making
more objective, but there also is a risk that the
decisions can discriminate against certain groups
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "28",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Azzalini:2022:FDF,
author = "Fabio Azzalini and Chiara Criscuolo and Letizia
Tanca",
title = "{E-FAIR-DB}: Functional Dependencies to Discover Data
Bias and Enhance Data Equity",
journal = j-JDIQ,
volume = "14",
number = "4",
pages = "29:1--29:??",
month = dec,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1145/3552433",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:10 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3552433",
abstract = "Decisions based on algorithms and systems generated
from data have become essential tools that pervade all
aspects of our daily lives; for these advances to be
reliable, the results should be accurate but should
also respect all the facets of data equity \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "29",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Wright:2023:ISI,
author = "Dustin Wright and Paolo Papotti and Isabelle
Augenstein",
title = "Introduction to the Special Issue on Truth and Trust
Online",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3578242",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3578242",
abstract = "This editorial summarizes the content of the Special
Issue on Truth and Trust Online of the Journal of Data
and Information Quality. We thank the authors for their
exceptional contributions to this special issue.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Gausen:2023:UAB,
author = "Anna Gausen and Wayne Luk and Ce Guo",
title = "Using Agent-Based Modelling to Evaluate the Impact of
Algorithmic Curation on Social Media",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546915",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3546915",
abstract = "Social media networks have drastically changed how
people communicate and seek information. Due to the
scale of information on these platforms, newsfeed
curation algorithms have been developed to sort through
this information and curate what users see. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Stammbach:2023:CTK,
author = "Dominik Stammbach and Boya Zhang and Elliott Ash",
title = "The Choice of Textual Knowledge Base in Automated
Claim Checking",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561389",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3561389",
abstract = "Automated claim checking is the task of determining
the veracity of a claim given evidence retrieved from a
textual knowledge base of trustworthy facts. While
previous work has taken the knowledge base as given and
optimized the claim-checking pipeline, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Brand:2023:NMJ,
author = "Erik Brand and Kevin Roitero and Michael Soprano and
Afshin Rahimi and Gianluca Demartini",
title = "A Neural Model to Jointly Predict and Explain
Truthfulness of Statements",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546917",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3546917",
abstract = "Automated fact-checking (AFC) systems exist to combat
disinformation, however, their complexity usually makes
them opaque to the end-user, making it difficult to
foster trust in the system. In this article, we
introduce the E-BART model with the hope of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Qu:2023:CHM,
author = "Yunke Qu and Kevin Roitero and David {La Barbera} and
Damiano Spina and Stefano Mizzaro and Gianluca
Demartini",
title = "Combining Human and Machine Confidence in Truthfulness
Assessment",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "5:1--5:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546916",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3546916",
abstract = "Automatically detecting online misinformation at scale
is a challenging and interdisciplinary problem.
Deciding what is to be considered truthful information
is sometimes controversial and also difficult for
educated experts. As the scale of the problem
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Anuchitanukul:2023:RCT,
author = "Atijit Anuchitanukul and Julia Ive and Lucia Specia",
title = "Revisiting Contextual Toxicity Detection in
Conversations",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "6:1--6:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561390",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3561390",
abstract = "Understanding toxicity in user conversations is
undoubtedly an important problem. Addressing ``covert''
or implicit cases of toxicity is particularly hard and
requires context. Very few previous studies have
analysed the influence of conversational context
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Panda:2023:DDW,
author = "Subhadarshi Panda and Sarah Levitan",
title = "Deception Detection Within and Across Domains:
Identifying and Understanding the Performance Gap",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "7:1--7:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3561413",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3561413",
abstract = "NLP approaches to automatic deception detection have
gained popularity over the past few years, especially
with the proliferation of fake reviews and fake news
online. However, most previous studies of deception
detection have focused on single domains. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Senaratne:2023:UIA,
author = "Asara Senaratne and Peter Christen and Graham Williams
and Pouya G. Omran",
title = "Unsupervised Identification of Abnormal Nodes and
Edges in Graphs",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "8:1--8:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546912",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3546912",
abstract = "Much of today's data are represented as graphs,
ranging from social networks to bibliographic
citations. Nodes in such graphs correspond to records
that generally represent entities, while edges
represent relationships between these entities. Both
nodes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Zuo:2023:SSP,
author = "Chaoyuan Zuo and Ritwik Banerjee and Fateme Hashemi
Chaleshtori and Hossein Shirazi and Indrakshi Ray",
title = "Seeing Should Probably Not Be Believing: The Role of
Deceptive Support in {COVID-19} Misinformation on
{Twitter}",
journal = j-JDIQ,
volume = "15",
number = "1",
pages = "9:1--9:??",
month = mar,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3546914",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Mar 9 08:17:11 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3546914",
abstract = "With the spread of the SARS-CoV-2, enormous amounts of
information about the pandemic are disseminated through
social media platforms such as Twitter. Social media
posts often leverage the trust readers have in
prestigious news agencies and cite news \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Navigli:2023:BLL,
author = "Roberto Navigli and Simone Conia and Bj{\"o}rn Ross",
title = "Biases in Large Language Models: Origins, Inventory,
and Discussion",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "10:1--10:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597307",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3597307",
abstract = "In this article, we introduce and discuss the
pervasive issue of bias in the large language models
that are currently at the core of mainstream approaches
to Natural Language Processing (NLP). We first
introduce data selection bias, that is, the bias
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Priestley:2023:SDQ,
author = "Maria Priestley and Fionnt{\'a}n O'donnell and Elena
Simperl",
title = "A Survey of Data Quality Requirements That Matter in
{ML} Development Pipelines",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "11:1--11:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592616",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3592616",
abstract = "The fitness of the systems in which Machine Learning
(ML) is used depends greatly on good-quality data.
Specifications on what makes a good-quality dataset
have traditionally been defined by the needs of the
data users-typically analysts and engineers. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Simon:2023:CCA,
author = "Eric Simon and Bernd Amann and Rutian Liu and
St{\'e}phane Gan{\c{c}}arski",
title = "Controlling the Correctness of Aggregation Operations
During Sessions of Interactive Analytic Queries",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "12:1--12:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3575812",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3575812",
abstract = "We present a comprehensive set of conditions and rules
to control the correctness of aggregation queries
within an interactive data analysis session. The goal
is to extend self-service data preparation and Business
Intelligence (BI) tools to automatically \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Skavantzos:2023:UCO,
author = "Philipp Skavantzos and Uwe Leck and Kaiqi Zhao and
Sebastian Link",
title = "Uniqueness Constraints for Object Stores",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "13:1--13:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3581758",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3581758",
abstract = "Object stores offer an increasingly popular choice for
data management and analytics. As with every data
model, managing the integrity of objects is fundamental
for data quality but also important for the efficiency
of update and query operations. In \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Smith:2023:LSA,
author = "Duncan Smith and Mark Elliot and Joseph W. Sakshaug",
title = "To Link or Synthesize? {An} Approach to Data Quality
Comparison",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "14:1--14:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3580487",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3580487",
abstract = "Linking administrative data to produce more
informative data for subsequent analysis has become an
increasingly common practice. However, there might be
concomitant risks of disclosing sensitive information
about individuals. One practice that reduces \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Ao:2023:TPR,
author = "Jing Ao and Zehui Cheng and Rada Chirkova and Phokion
G. Kolaitis",
title = "Theory and Practice of Relational-to-{RDF} Temporal
Data Exchange and Query Answering",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "15:1--15:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3591359",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3591359",
abstract = "We consider the problem of answering temporal queries
on RDF stores, in presence of atemporal RDFS domain
ontologies, of relational data sources that include
temporal information, and of rules that map the domain
information in the source schemas into the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Timko:2023:IMD,
author = "Christina Timko and Malte Niederstadt and Naman Goel
and Boi Faltings",
title = "Incentive Mechanism Design for Responsible Data
Governance: a Large-scale Field Experiment",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "16:1--16:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592617",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3592617",
abstract = "A crucial building block of responsible artificial
intelligence is responsible data governance, including
data collection. Its importance is also underlined in
the latest EU regulations. The data should be of high
quality, foremost correct and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Simard:2023:MCD,
author = "Vanessa Simard and Mikael R{\"o}nnqvist and Luc Lebel
and Nadia Lehoux",
title = "A Method to Classify Data Quality for Decision Making
Under Uncertainty",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "17:1--17:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3592534",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3592534",
abstract = "Every decision-making process is subject to a certain
degree of uncertainty. In sectors where the outcomes of
the operations planned are uncertain and difficult to
control such as in forestry, data describing the
available resources can have a large \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Tawakuli:2023:EDB,
author = "Amal Tawakuli and Daniel Kaiser and Thomas Engel",
title = "Experience: Differentiating Between Isolated and
Sequence Missing Data",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "18:1--18:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3575809",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3575809",
abstract = "Missing data is one of the most persistent problems
found in data that hinders information and value
extraction. Handling missing data is a preprocessing
task that has been extensively studied by the research
community and remains an active research topic
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "18",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Srivastava:2023:ESI,
author = "Gautam Srivastava and Jerry Chun-Wei Lin and Zhihan
Lv",
title = "Editorial for the Special Issue on Quality Assessment
of Data Security",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "19:1--19:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3591360",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3591360",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "19",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Hoffpauir:2023:SEI,
author = "Kyle Hoffpauir and Jacob Simmons and Nikolas Schmidt
and Rachitha Pittala and Isaac Briggs and Shanmukha
Makani and Yaser Jararweh",
title = "A Survey on Edge Intelligence and Lightweight Machine
Learning Support for Future Applications and Services",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "20:1--20:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3581759",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3581759",
abstract = "As the number of devices connected to the Internet has
grown larger, so too has the intensity of the tasks
that these devices need to perform. Modern networks are
more frequently working to perform computationally
intensive tasks on low-power devices and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "20",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Singh:2023:IEC,
author = "Kedar Nath Singh and Amit Kumar Singh",
title = "An Improved Encryption-Compression-based Algorithm for
Securing Digital Images",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "21:1--21:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3532783",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3532783",
abstract = "Nowadays, there is an increasing tendency to upload
images to online platforms acting as information
carriers for various applications. Unfortunately, the
unauthorized utilization of such images is a serious
concern that has significantly impacted \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "21",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Supriya:2023:SSC,
author = "Y. Supriya and Thippa Reddy Gadekallu",
title = "A Survey on Soft Computing Techniques for Federated
Learning --- Applications, Challenges and Future
Directions",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "22:1--22:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3575810",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3575810",
abstract = "Federated Learning is a distributed,
privacy-preserving machine learning model that is
gaining more attention these days. Federated Learning
has a vast number of applications in different fields.
While being more popular, it also suffers some
drawbacks \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "22",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Chatterjee:2023:MRS,
author = "Kakali Chatterjee and Ashish Singh and Neha and Keping
Yu",
title = "A Multifactor Ring Signature based Authentication
Scheme for Quality Assessment of {IoMT} Environment in
{COVID-19} Scenario",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "23:1--23:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3575811",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3575811",
abstract = "The quality of the healthcare environment has become
an essential factor for healthcare users to access
quality services. Smart healthcare systems use the
Internet of Medical Things (IoMT) devices to capture
patients' health data for treatment or \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "23",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Kumar:2023:EEC,
author = "Gautam Kumar and Sambit Bakshi and Arun Kumar Sangaiah
and Pankaj Kumar Sa",
title = "Experimental Evaluation of Covariates Effects on
Periocular Biometrics: a Robust Security Assessment
Framework",
journal = j-JDIQ,
volume = "15",
number = "2",
pages = "24:1--24:??",
month = jun,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3579029",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Jul 1 13:31:36 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3579029",
abstract = "The growing integration of technology into our lives
has resulted in unprecedented amounts of data that are
being exchanged among devices in an Internet of Things
(IoT) environment. Authentication, identification, and
device heterogeneities are major \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "24",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Fadlallah:2023:CAB,
author = "Hadi Fadlallah and Rima Kilany and Houssein Dhayne and
Rami {El Haddad} and Rafiqul Haque and Yehia Taher and
Ali Jaber",
title = "Context-aware Big Data Quality Assessment: a Scoping
Review",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "25:1--25:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603707",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603707",
abstract = "The term data quality refers to measuring the fitness
of data regarding the intended usage. Poor data quality
leads to inadequate, inconsistent, and erroneous
decisions that could escalate the computational cost,
cause a decline in profits, and cause \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "25",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Irrera:2023:NCS,
author = "Ornella Irrera and Andrea Mannocci and Paolo Manghi
and Gianmaria Silvello",
title = "A Novel Curated Scholarly Graph Connecting Textual and
Data Publications",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "26:1--26:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597310",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3597310",
abstract = "In the last decade, scholarly graphs became
fundamental to storing and managing scholarly knowledge
in a structured and machine-readable way. Methods and
tools for discovery and impact assessment of science
rely on such graphs and their quality to serve
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "26",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Fadlallah:2023:BDB,
author = "Hadi Fadlallah and Rima Kilany and Houssein Dhayne and
Rami {El Haddad} and Rafiqul Haque and Yehia Taher and
Ali Jaber",
title = "{BIGQA}: Declarative Big Data Quality Assessment",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "27:1--27:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603706",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603706",
abstract = "In the big data domain, data quality assessment
operations are often complex and must be implementable
in a distributed and timely manner. This article tries
to generalize the quality assessment operations by
providing a new ISO-based declarative data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "27",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Wenz:2023:CHD,
author = "Viola Wenz and Arno Kesper and Gabriele Taentzer",
title = "Clustering Heterogeneous Data Values for Data Quality
Analysis",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "28:1--28:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603710",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603710",
abstract = "Data is of high quality if it is fit for its intended
purpose. Data heterogeneity can be a major quality
problem, as quality aspects such as understandability
and consistency can be compromised. Heterogeneity of
data values is particularly common when \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "28",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Hofstede:2023:PDQ,
author = "Arthur H. M. Ter Hofstede and Agnes Koschmider and
Andrea Marrella and Robert Andrews and Dominik A.
Fischer and Sareh Sadeghianasl and Moe Thandar Wynn and
Marco Comuzzi and Jochen {De Weerdt} and Kanika Goel
and Niels Martin and Pnina Soffer",
title = "Process-Data Quality: The True Frontier of Process
Mining",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "29:1--29:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3613247",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3613247",
abstract = "Since its emergence over two decades ago, process
mining has flourished as a discipline, with numerous
contributions to its theory, widespread practical
applications, and mature support by commercial tooling
environments. However, its potential for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "29",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Chakraborty:2023:EMM,
author = "Chinmay Chakraborty and Mohammad Khosravi and Muhammad
Khurram Khan and Houbing Herbert Song",
title = "Editorial: Multimodality, Multidimensional
Representation, and Multimedia Quality Assessment
Toward Information Quality in Social {Web} of Things",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "30:1--30:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3625102",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3625102",
abstract = "This editorial summarizes the content of the
collection on Multimodality, Multidimensional
Representation, and Multimedia Quality Assessment
Toward Information Quality in Social Web of Things for
the Journal of Data and Information Quality.",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "30",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Aume:2023:MSD,
author = "Cameron Aume and Shantanu Pal and Alireza Jolfaei and
Subhas Mukhopadhyay",
title = "Multimodal Social Data Analytics on the Design and
Implementation of an {EEG}-Mechatronic System
Interface",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "31:1--31:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597306",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3597306",
abstract = "The devices that can read Electroencephalography (EEG)
signals have been widely used for Brain-Computer
Interfaces (BCIs). Popularity in the field of BCIs has
increased in recent years with the development of
several consumer-grade EEG devices that can \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "31",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Jing:2023:SCT,
author = "Yang Jing and Ma Haowei and Arshiya S. Ansari and G.
Sucharitha and Batyrkhan Omarov and Sandeep Kumar and
Mohammad Sajid Mohammadi and Khaled A. Z. Alyamani",
title = "Soft Computing Techniques for Detecting Cyberbullying
in Social Multimedia Data",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "32:1--32:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3604617",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3604617",
abstract = "Cyberbullying is a form of abuse, manipulation, or
humiliation directed against a single person via the
Internet. CB makes use of nasty Internet comments and
remarks. It occurs when someone publicly mocks,
insults, slanders, criticizes, or mocks another
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "32",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Matrouk:2023:DLB,
author = "Khaled Matrouk and {Srikanth V} and Sumit Kumar and
Mohit Kumar Bhadla and Mirza Sabirov and Mohamed J.
Saadh",
title = "Deep Learning-based Dynamic User Alignment in Social
Networks",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "33:1--33:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603711",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603711",
abstract = "Academics and businesses are paying intense attention
to social network alignment, which centers various
social networks around their shared members. All
studies to date treat the social network as static and
ignore its innate dynamism. In reality, an \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "33",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Martin:2023:FBR,
author = "R. John Martin and Rajvardhan Oak and Mukesh Soni and
V. Mahalakshmi and Arsalan Muhammad Soomar and Anjali
Joshi",
title = "Fusion-based Representation Learning Model for
Multimode User-generated Social Network Content",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "34:1--34:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603712",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603712",
abstract = "As mobile networks and APPs are developed,
user-generated content (UGC), which includes
multi-source heterogeneous data like user reviews,
tags, scores, images, and videos, has become an
essential basis for improving the quality of
personalized services. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "34",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Attar:2023:JIM,
author = "Hani Attar",
title = "Joint {IoT\slash ML} Platforms for Smart Societies and
Environments: a Review on Multimodal Information-Based
Learning for Safety and Security",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "35:1--35:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603713",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603713",
abstract = "The application of the Internet of Things (IoT) is
highly expected to have comprehensive economic,
business, and societal implications for our smart
lives; indeed, IoT technologies play an essential role
in creating a variety of smart applications that
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "35",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Al-Qerem:2023:TSC,
author = "Ahmad Al-Qerem and Ali Mohd Ali and Shadi Nashwan and
Mohammad Alauthman and Ala Hamarsheh and Ahmad Nabot
and Issam Jibreen",
title = "Transactional Services for Concurrent Mobile Agents
over Edge\slash Cloud Computing-Assisted Social
{Internet of Things}",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "36:1--36:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603714",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603714",
abstract = "The Web of Things (WoT) is a concept that aims to
create a network of intelligent devices capable of
remote monitoring, service provisioning, and control.
Virtual and Physical Internet of Things (IoT) gateways
facilitate communication, processing, and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "36",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Al-Qerem:2023:SGM,
author = "Ahmad Al-Qerem and Ali Mohd Ali and Hani Attar and
Shadi Nashwan and Lianyong Qi and Mohammad Kazem
Moghimi and Ahmed Solyman",
title = "Synthetic Generation of Multidimensional Data to
Improve Classification Model Validity",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "37:1--37:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603715",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603715",
abstract = "This article aims to compare Generative Adversarial
Network (GAN) models and feature selection methods for
generating synthetic data in order to improve the
validity of a classification model. The synthetic data
generation technique involves generating \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "37",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Alzubi:2023:MDL,
author = "Ahmad Alzu'bi and Lojin Bani Younis and Abdelrahman
Abuarqoub and Mohammad Hammoudeh",
title = "Multimodal Deep Learning with Discriminant Descriptors
for Offensive Memes Detection",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "38:1--38:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597308",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3597308",
abstract = "A meme is a visual representation that illustrates a
thought or concept. Memes are spreading steadily among
people in this era of rapidly expanding social media
platforms, and they are becoming increasingly popular
forms of expression. In the domain of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "38",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Varedi:2023:NFS,
author = "Erfan Varedi and Reza Boostani",
title = "A Novel Feature Selection Method for Risk Management
in High-Dimensional Time Series of Cryptocurrency
Market",
journal = j-JDIQ,
volume = "15",
number = "3",
pages = "39:1--39:??",
month = sep,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597309",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Oct 2 15:49:58 MDT 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3597309",
abstract = "In this study, a novel approach for feature selection
has been presented in order to overcome the challenge
of classifying positive and negative risk prediction in
the cryptocurrency market, which contains high
fluctuation. This approach is based on \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "39",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Console:2023:ESI,
author = "Marco Console and Maurizio Lenzerini",
title = "Editorial: Special Issue on Quality Aspects of Data
Preparation",
journal = j-JDIQ,
volume = "15",
number = "4",
pages = "40:1--40:??",
month = dec,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626461",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Dec 23 05:24:09 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3626461",
abstract = "This Special Issue of the Journal of Data and
Information Quality (JDIQ) contains novel theoretical
and methodological contributions as well as
state-of-the-art reviews and research perspectives on
quality aspects of data preparation. In this editorial,
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "40",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Lambrix:2023:CDO,
author = "Patrick Lambrix",
title = "Completing and Debugging Ontologies: State-of-the-art
and Challenges in Repairing Ontologies",
journal = j-JDIQ,
volume = "15",
number = "4",
pages = "41:1--41:??",
month = dec,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597304",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Dec 23 05:24:09 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3597304",
abstract = "As semantically enabled applications require
high-quality ontologies, developing and maintaining
ontologies that are as correct and complete as possible
is an important although difficult task in ontology
engineering. A key task is ontology debugging and
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "41",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Bono:2023:PDD,
author = "Carlo A. Bono and Cinzia Cappiello and Barbara Pernici
and Edoardo Ramalli and Monica Vitali",
title = "Pipeline Design for Data Preparation for Social Media
Analysis",
journal = j-JDIQ,
volume = "15",
number = "4",
pages = "42:1--42:??",
month = dec,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3597305",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Dec 23 05:24:09 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3597305",
abstract = "In a data-driven culture, in which analytics
applications are the main resources for supporting
decision-making, the use of high-quality datasets is
mandatory to minimize errors and risks. For this
reason, data analysis tasks need to be preceded by a
data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "42",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Krasikov:2023:MSA,
author = "Pavel Krasikov and Christine Legner",
title = "A Method to Screen, Assess, and Prepare Open Data for
Use: a Method to Screen, Assess, and Prepare Open Data
for Use",
journal = j-JDIQ,
volume = "15",
number = "4",
pages = "43:1--43:??",
month = dec,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603708",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Dec 23 05:24:09 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603708",
abstract = "Open data's value-creating capabilities and innovation
potential are widely recognized, resulting in a notable
increase in the number of published open data sources.
A crucial challenge for companies intending to leverage
open data is to identify suitable \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "43",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Patel:2023:DCA,
author = "Hima Patel and Shanmukha Guttula and Nitin Gupta and
Sandeep Hans and Ruhi Sharma Mittal and Lokesh N.",
title = "A Data-centric {AI} Framework for Automating
Exploratory Data Analysis and Data Quality Tasks",
journal = j-JDIQ,
volume = "15",
number = "4",
pages = "44:1--44:??",
month = dec,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3603709",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Dec 23 05:24:09 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3603709",
abstract = "Democratisation of machine learning (ML) has been an
important theme in the research community for the last
several years with notable progress made by the
model-building community with automated machine
learning models. However, data play a central role
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "44",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Vasto-Terrientes:2023:EDM,
author = "Luis {Del Vasto-Terrientes}",
title = "Experience: Data Management for Delivering {COVID-19}
Relief in {Panama}",
journal = j-JDIQ,
volume = "15",
number = "4",
pages = "45:1--45:??",
month = dec,
year = "2023",
CODEN = "????",
DOI = "https://doi.org/10.1145/3623511",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Sat Dec 23 05:24:09 MST 2023",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3623511",
abstract = "A data-driven public sector recognizes data as a key
element for implementing policies based on evidence.
The open data movement has been a major catalyst for
elevating data to a privileged position in many
governments around the globe. In Panama, open
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "45",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Naumann:2024:E,
author = "Felix Naumann",
title = "Editorial",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3650728",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3650728",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "1",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Catarci:2024:ECJ,
author = "Tiziana Catarci",
title = "{Editor}-in-{Chief} (June $ 2017$-November 2023)
Farewell Report",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3651229",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3651229",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "2",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Demartini:2024:ESI,
author = "Gianluca Demartini and Shazia Sadiq and Jie Yang",
title = "Editorial: Special Issue on Human in the Loop Data
Curation",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3650209",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3650209",
abstract = "This Special Issue of the Journal of Data and
Information Quality (JDIQ) contains novel theoretical
and methodological contributions on data curation
involving humans in the loop. In this editorial, we
summarize the scope of the issue and briefly describe
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "3",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Tsaneva:2024:EHL,
author = "Stefani Tsaneva and Marta Sabou",
title = "Enhancing Human-in-the-Loop Ontology Curation Results
through Task Design",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "4:1--4:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3626960",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3626960",
abstract = "The success of artificial intelligence (AI)
applications is heavily dependent on the quality of
data they rely on. Thus, data curation, dealing with
cleaning, organising, and managing data, has become a
significant research area to be addressed. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "4",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Breuer:2024:VSU,
author = "Timo Breuer and Norbert Fuhr and Philipp Schaer",
title = "Validating Synthetic Usage Data in Living Lab
Environments",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "5:1--5:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3623640",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3623640",
abstract = "Evaluating retrieval performance without editorial
relevance judgments is challenging, but instead, user
interactions can be used as relevance signals. Living
labs offer a way for small-scale platforms to validate
information retrieval systems with real \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "5",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Pereira:2024:CSU,
author = "Jo{\~a}o L. M. Pereira and Manuel J. Fonseca and
Ant{\'o}nia Lopes and Helena Galhardas",
title = "{Cleenex}: Support for User Involvement during an
Iterative Data Cleaning Process",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "6:1--6:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3648476",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3648476",
abstract = "The existence of large amounts of data increases the
probability of occurring data quality problems. A data
cleaning process that corrects these problems is
usually an iterative process, because it may need to be
re-executed and refined to produce high-. \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "6",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Deunf:2024:DQA,
author = "Julian {Le Deunf} and Arwa Khannoussi and Laurent
Lecornu and Patrick Meyer and John Puentes",
title = "Data Quality Assessment through a Preference Model",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "7:1--7:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3632407",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3632407",
abstract = "Evaluating the quality of data is a problem of a
multi-dimensional nature and quite frequently depends
on the perspective of an expected use or final purpose
of the data. Numerous works have explored the
well-known specification of data quality dimensions
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "7",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Geeganage:2024:TEG,
author = "Dakshi Tharanga Kapugama Geeganage and Moe Thandar
Wynn and Arthur H. M. ter Hofstede",
title = "{Text2EL+}: Expert Guided Event Log Enrichment Using
Unstructured Text",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "8:1--8:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3640018",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3640018",
abstract = "Through the application of process mining, business
processes can be improved on the basis of process
execution data captured in event logs. Naturally, the
quality of this data determines the quality of the
improvement recommendations. Improving data \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "8",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Backes:2024:CCS,
author = "Tobias Backes and Stefan Dietze",
title = "Connected Components for Scaling Partial-order
Blocking to Billion Entities",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "9:1--9:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3646553",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3646553",
abstract = "In entity resolution, blocking pre-partitions data for
further processing by more expensive methods. Two
entity mentions are in the same block if they share
identical or related blocking-keys. Previous work has
sometimes related blocking keys by grouping \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "9",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Richard:2024:AEA,
author = "Guy-Junior Richard and J{\'e}r{\^o}me Habonneau and
Didier Gu{\'e}riot and Jean-Marc {Le Caillec}",
title = "{AI} Explainability and Acceptance: a Case Study for
Underwater Mine Hunting",
journal = j-JDIQ,
volume = "16",
number = "1",
pages = "10:1--10:??",
month = mar,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3635113",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Mon Mar 25 11:29:07 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3635113",
abstract = "In critical operational context such as Mine Warfare,
Automatic Target Recognition (ATR) algorithms are still
hardly accepted. The complexity of their
decision-making hampers understanding of predictions
despite performances approaching human expert ones.
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "10",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Li:2024:ALD,
author = "Na Li and Yiyang Qi and Chaoran Li and Zhiming Zhao",
title = "Active Learning for Data Quality Control: a Survey",
journal = j-JDIQ,
volume = "16",
number = "2",
pages = "11:1--11:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3663369",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jun 27 06:15:46 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3663369",
abstract = "Data quality plays a vital role in scientific research
and decision-making across industries. Thus, it is
crucial to incorporate the data quality control (DQC)
process, which comprises various actions and operations
to detect and correct data errors. The \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "11",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Mecca:2024:BLR,
author = "Giansalvatore Mecca and Paolo Papotti and Donatello
Santoro and Enzo Veltri",
title = "{BUNNI}: Learning Repair Actions in Rule-driven Data
Cleaning",
journal = j-JDIQ,
volume = "16",
number = "2",
pages = "12:1--12:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3665930",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jun 27 06:15:46 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3665930",
abstract = "In this work, we address the challenging and open
problem of involving non-expert users in the data
repairing problem as first-class citizens. Despite a
large number of proposals that have been devoted to
cleaning data from the point of view of expert
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "12",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Bachinger:2024:DVU,
author = "Florian Bachinger and Lisa Ehrlinger and Gabriel
Kronberger and Wolfram W{\"o}ss",
title = "Data Validation Utilizing Expert Knowledge and Shape
Constraints",
journal = j-JDIQ,
volume = "16",
number = "2",
pages = "13:1--13:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3661826",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jun 27 06:15:46 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3661826",
abstract = "Data validation is a primary concern in any
data-driven application, as undetected data errors may
negatively affect machine learning models and lead to
suboptimal decisions. Data quality issues are usually
detected manually by experts, which becomes \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "13",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Stenger:2024:TCS,
author = "Michael Stenger and Andr{\'e} Bauer and Thomas Prantl
and Robert Leppich and Nathaniel Hudson and Kyle Chard
and Ian Foster and Samuel Kounev",
title = "Thinking in Categories: a Survey on Assessing the
Quality for Time Series Synthesis",
journal = j-JDIQ,
volume = "16",
number = "2",
pages = "14:1--14:??",
month = jun,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3666006",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Jun 27 06:15:46 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3666006",
abstract = "Time series data are widely used and provide a wealth
of information for countless applications. However,
some applications are faced with a limited amount of
data, or the data cannot be used due to confidentiality
concerns. To overcome these obstacles, \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "14",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Chuprov:2024:DQB,
author = "Sergei Chuprov and Raman Zatsarenko and Leon Reznik
and Igor Khokhlov",
title = "Data Quality Based Intelligent Instrument Selection
with Security Integration",
journal = j-JDIQ,
volume = "16",
number = "3",
pages = "15:1--15:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3695770",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Oct 10 06:13:03 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3695770",
abstract = "We propose a novel Data Quality with Security (DQS)
integrated instrumentation selection approach that
facilitates aggregation of multi-modal data from
heterogeneous sources. As our major contribution, we
develop a framework that incorporates multiple
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "15",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Belgacem:2024:AAD,
author = "Hichem Belgacem and Xiaochen Li and Domenico Bianculli
and Lionel Briand",
title = "Automated anomaly detection for categorical data by
repurposing a form filling recommender system",
journal = j-JDIQ,
volume = "16",
number = "3",
pages = "16:1--16:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3696110",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Oct 10 06:13:03 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3696110",
abstract = "Data quality is crucial in modern software systems,
like data-driven decision support systems. However,
data quality is affected by data anomalies, which
represent instances that deviate from most of the data.
These anomalies affect the reliability and \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "16",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Peters:2024:GEM,
author = "Heinrich Peters and Alireza Hashemi and James Rae",
title = "Generalizable Error Modeling for Human Data
Annotation: Evidence From an Industry-Scale Search Data
Annotation Program",
journal = j-JDIQ,
volume = "16",
number = "3",
pages = "17:1--17:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3688394",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Oct 10 06:13:03 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3688394",
abstract = "Machine learning (ML) and artificial intelligence (AI)
systems rely heavily on human-annotated data for
training and evaluation. A major challenge in this
context is the occurrence of annotation errors, as
their effects can degrade model performance. This
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "17",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Alzahrani:2024:ECA,
author = "Naif Alzahrani and Jacek Ca{\l}a and Paolo Missier",
title = "Experience: a Comparative Analysis of Multivariate
Time-Series Generative Models: a Case Study on Human
Activity Data",
journal = j-JDIQ,
volume = "16",
number = "3",
pages = "18:1--18:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3688393",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Oct 10 06:13:03 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3688393",
abstract = "Human activity recognition (HAR) is an active research
field that has seen great success in recent years due
to advances in sensory data collection methods and
activity recognition systems. Deep artificial
intelligence (AI) models have contributed to the
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "18",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Serra:2024:UCD,
author = "Flavia Serra and Ver{\'o}nika Peralta and Adriana
Marotta and Patrick Marcel",
title = "Use of Context in Data Quality Management: a
Systematic Literature Review",
journal = j-JDIQ,
volume = "16",
number = "3",
pages = "19:1--19:??",
month = sep,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3672082",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Thu Oct 10 06:13:03 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3672082",
abstract = "The importance of context in data quality (DQ) was
shown many years ago and nowadays is widely accepted.
Early approaches and surveys defined DQ as fitness for
use and showed the influence of context on DQ. This
article presents a Systematic Literature \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "19",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Khomh:2024:ESI,
author = "Foutse Khomh and Andreas Metzger and Phu Nguyen and
Sagar Sen",
title = "Editorial: Special Issue on Software Engineering and
{AI} for Data Quality",
journal = j-JDIQ,
volume = "16",
number = "4",
pages = "20:1--20:??",
month = dec,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3708503",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue Dec 24 06:42:37 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3708503",
abstract = "This editorial summarizes the content of the Special
Issue on Software Engineering and AI for Data Quality
of the Journal of Data and Information Quality
(JDIQ).",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "20",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Golendukhina:2024:CCI,
author = "Valentina Golendukhina and Harald Foidl and Daniel
H{\"o}rl and Michael Felderer",
title = "A Catalog of Consumer {IoT} Device Characteristics for
Data Quality Estimation",
journal = j-JDIQ,
volume = "16",
number = "4",
pages = "21:1--21:??",
month = dec,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3639708",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue Dec 24 06:42:37 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3639708",
abstract = "The Internet of Things (IoT) is rapidly growing and
spreading across different markets, including the
customer market and consumer IoT (CIoT). The large
variety of gadgets and their availability makes CIoT
more and more influential, especially in the \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "21",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Begoli:2024:CDP,
author = "Edmon Begoli and Maria Mahbub and Linsey Passarella
and Sudarshan Srinivasan",
title = "A Compound Data Poisoning Technique with Significant
Adversarial Effects on Transformer-based Sentiment
Classification Tasks",
journal = j-JDIQ,
volume = "16",
number = "4",
pages = "22:1--22:??",
month = dec,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3705897",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue Dec 24 06:42:37 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3705897",
abstract = "Transformer-based models have demonstrated much
success in various natural language processing tasks.
However, they are often vulnerable to adversarial
attacks, such as data poisoning, which can
intentionally fool the model into generating incorrect
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "22",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Valeriano:2024:UPM,
author = "Maria Gabriela Valeriano and Ana Matran-Fernandez and
Carlos Kiffer and Ana Carolina Lorena",
title = "Understanding the performance of machine learning
models from data- to patient-level",
journal = j-JDIQ,
volume = "16",
number = "4",
pages = "23:1--23:??",
month = dec,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3687267",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue Dec 24 06:42:37 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3687267",
abstract = "Machine Learning (ML) models have the potential to
support decision-making in healthcare by grasping
complex patterns within data. However, decisions in
this domain are sensitive and require active
involvement of domain specialists with deep knowledge
of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "23",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Jesus:2024:UAE,
author = "Rui Filipe Ribeiro Jesus and Ana Rodrigues and Carlos
Costa",
title = "Unlocking {AutoML}: Enhancing Data with Deep Learning
Algorithms for Medical Imaging",
journal = j-JDIQ,
volume = "16",
number = "4",
pages = "24:1--24:??",
month = dec,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3705896",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue Dec 24 06:42:37 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3705896",
abstract = "Deep learning algorithms have become increasingly
popular over the years, having proved their efficiency
in input-output functions for distinct types of data.
This technology is particularly useful in medical
imaging, where complex image structures often
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "24",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}
@Article{Truong:2024:TPR,
author = "Hong-Linh Truong and Ngoc Nhu Trang Nguyen",
title = "{TENSAI} --- Practical and Responsible Observability
for Data Quality-aware Large-scale Analytics",
journal = j-JDIQ,
volume = "16",
number = "4",
pages = "25:1--25:??",
month = dec,
year = "2024",
CODEN = "????",
DOI = "https://doi.org/10.1145/3708014",
ISSN = "1936-1955",
ISSN-L = "1936-1955",
bibdate = "Tue Dec 24 06:42:37 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/jdiq.bib",
URL = "https://dl.acm.org/doi/10.1145/3708014",
abstract = "Given a large-scale mobile network with a variety of
equipment and radio access network technologies for an
approximate 20 million subscribers, there are many
types of data that can be used for big data analytics
and machine learning (ML) tasks for \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "J. Data Inf. Qual.",
articleno = "25",
fjournal = "Journal of Data and Information Quality (JDIQ)",
journal-URL = "https://dl.acm.org/loi/jdiq",
}